Figure S1

plot 100g100y

GET("http://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 102 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1727771ce4b.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2), data=pd2, family = "binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##  11956.0  11966.6  -5974.0  11948.0      101 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -22.2675  -6.2994  -0.5091   4.9214  30.8589 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.0958   0.3095  
## Number of obs: 105, groups:  day2, 3
## 
## Fixed effects:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -0.27354    0.17956  -1.523    0.128    
## trtaxenic 100g_axenic 100  0.54980    0.02326  23.639   <2e-16 ***
## trtlacto 100g_lacto 100    0.42509    0.02892  14.701   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##              (Intr) trtx100g_100
## trtx100g_100 -0.077             
## trtl100g_100 -0.054  0.396
abc1 <- glht(abc, mcp(trt='Tukey'))
abc2 <- cld(abc1)
pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
colors2 <- c("gray","red","blue")

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("aceto 100g_aceto 100" = "At","axenic 100g_axenic 100" = "Ax","lacto 100g_lacto 100" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p100g100y <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
    geom_segment(stat="identity",size=plot_text_size*1.75) + 
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,1))+
    theme_cowplot() + 
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), axis.text = element_blank(), legend.position = "none", axis.ticks = element_blank(), axis.line = element_line(size = 0.2)) +
    geom_errorbar(col = "black", ymax =pd3e$mean+pd3e$sem, ymin = pd3e$mean-pd3e$sem, width=0.25, size=.25) +
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","b","c"), y=.75), size=plot_text_size) + coord_flip()
p100g100y

#ggsave(h=2,w=3.5,"100G100Y.png")

plot 75y100g

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g75y%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100g75y%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 73.7 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1726d4855a9.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2), data=pd2, family = "binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##   4052.7   4062.1  -2022.3   4044.7       73 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -15.9803  -5.0553  -0.8682   4.6956  13.9512 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.03824  0.1956  
## Number of obs: 77, groups:  day2, 3
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     0.58502    0.11633   5.029 4.93e-07 ***
## trtaxenic control_axenic trial -0.60834    0.04969 -12.243  < 2e-16 ***
## trtlacto control_lacto trial   -0.68967    0.03661 -18.836  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtxcntrl_t
## trtxcntrl_t -0.165            
## trtlcntrl_t -0.183  0.544
abc1 <- glht(abc, mcp(trt='Tukey'))
abc2 <- cld(abc1)
pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
colors2 <- c("gray","red","blue")

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("aceto control_aceto trial" = "At","axenic control_axenic trial" = "ax","lacto control_lacto trial" = "Lp"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("ax","At","Lp"))

plot_text_size = 3
p75y100g <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
    geom_segment(stat="identity",size=plot_text_size*1.75) + 
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,1))+
    theme_cowplot() + 
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), axis.text = element_blank(), legend.position = "none", axis.ticks = element_blank(), axis.line = element_line(size = 0.2)) +
    geom_errorbar(col = "black", ymax =pd3e$mean+pd3e$sem, ymin = pd3e$mean-pd3e$sem, width=0.25, size=.25) +
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("b","a","a"), y=.75), size=plot_text_size) + coord_flip()
p75y100g

#ggsave(h=2,w=3.5,"75y100g.png")

50g100y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2050g100y%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%2050g100y%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 104 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1727e26e00f.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2), data=pd2, family = "binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##   8518.0   8528.8  -4255.0   8510.0      106 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -27.4750  -5.7871  -0.1475   3.6774  20.8372 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.2305   0.4801  
## Number of obs: 110, groups:  day2, 3
## 
## Fixed effects:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                0.22923    0.27842   0.823    0.410    
## trtaxenic 100g_axenic 50g  0.04887    0.03384   1.444    0.149    
## trtlacto 100g_lacto 50g    0.49750    0.04070  12.223   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtx100g_50
## trtx100g_50 -0.071            
## trtl100g_50 -0.060  0.496
abc1 <- glht(abc, mcp(trt='Tukey'))
abc2 <- cld(abc1)
pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
colors2 <- c("gray","red","blue")

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("aceto 100g_aceto 50g" = "At","axenic 100g_axenic 50g" = "Ax","lacto 100g_lacto 50g" = "Lb"))
pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p50g100y <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
    geom_segment(stat="identity",size=plot_text_size*1.75) + 
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,1))+
    theme_cowplot() + 
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), axis.text = element_blank(), legend.position = "none", axis.ticks = element_blank(), axis.line = element_line(size = 0.2)) +
    geom_errorbar(col = "black", ymax =pd3e$mean+pd3e$sem, ymin = pd3e$mean-pd3e$sem, width=0.25, size=.25) +
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","a","b"), y=.75), size=plot_text_size) + coord_flip()

p50g100y

#ggsave(h=2,w=3.5,"50G100Y.png")

25g100y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2025g100y%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%2025g100y%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 102 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1725e7e5336.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2), data=pd2, family = "binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##   6920.1   6930.8  -3456.1   6912.1      103 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -29.6911  -2.9387   0.5358   3.2319  18.5342 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.1271   0.3566  
## Number of obs: 107, groups:  day2, 3
## 
## Fixed effects:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                0.95165    0.20850   4.564 5.02e-06 ***
## trtaxenic 100g_axenic 25g  0.08856    0.04448   1.991   0.0465 *  
## trtlacto 100g_lacto 25g   -0.37809    0.03966  -9.533  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtx100g_25
## trtx100g_25 -0.118            
## trtl100g_25 -0.130  0.602
abc1 <- glht(abc, mcp(trt='Tukey'))
abc2 <- cld(abc1)
pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
colors2 <- c("gray","red","blue")

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("aceto 100g_aceto 25g" = "At","axenic 100g_axenic 25g" = "Ax","lacto 100g_lacto 25g" = "Lb"))
pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p25g100y <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
    geom_segment(stat="identity",size=plot_text_size*1.75) + 
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,1))+
    theme_cowplot() + 
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), axis.text = element_blank(), legend.position = "none", axis.ticks = element_blank(), axis.line = element_line(size = 0.2)) +
    geom_errorbar(col = "black", ymax =pd3e$mean+pd3e$sem, ymin = pd3e$mean-pd3e$sem, width=0.25, size=.25) +
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","a","b"), y=.85), size=plot_text_size) + coord_flip()
p25g100y

#ggsave(h=2,w=3.5,"25G100Y.png")

75g100y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2075g100y%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%2075g100y%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 105 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172f5882f3.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2), data=pd2, family = "binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##  11762.0  11772.8  -5877.0  11754.0      105 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -20.512  -4.551   0.928   7.026  28.547 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.2505   0.5005  
## Number of obs: 109, groups:  day2, 3
## 
## Fixed effects:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)               -0.306905   0.289588  -1.060    0.289    
## trtaxenic 100g_axenic 100 -0.229201   0.026267  -8.726   <2e-16 ***
## trtlacto 100g_lacto 100   -0.005985   0.028115  -0.213    0.831    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##              (Intr) trtx100g_100
## trtx100g_100 -0.047             
## trtl100g_100 -0.045  0.498
abc1 <- glht(abc, mcp(trt='Tukey'))
abc2 <- cld(abc1)
pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
colors2 <- c("gray","red","blue")

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("aceto 100g_aceto 100" = "At","axenic 100g_axenic 100" = "Ax","lacto 100g_lacto 100" = "Lb"))
pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p100y75g <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
    geom_segment(stat="identity",size=plot_text_size*1.75) + 
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,1))+
    theme_cowplot() + 
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), axis.text = element_blank(), legend.position = "none", axis.ticks = element_blank(), axis.line = element_line(size = 0.2)) +
    geom_errorbar(col = "black", ymax =pd3e$mean+pd3e$sem, ymin = pd3e$mean-pd3e$sem, width=0.25, size=.25) +
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("b","a","b"), y=.75), size=plot_text_size) + coord_flip()
p100y75g

#ggsave(h=2,w=3.5,"100y75g.png")

100g50y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g50y%20+Data.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100g50y%20%2BData.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 74.2 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1722925142d.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2), data=pd2, family = "binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##   6426.8   6436.1  -3209.4   6418.8       72 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -22.1964  -5.3314  -0.5442   4.2209  25.1995 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.54     0.7349  
## Number of obs: 76, groups:  day2, 4
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -0.64709    0.36881  -1.755   0.0793 .  
## trtaxenic control_axenic trial -0.28125    0.05449  -5.162 2.44e-07 ***
## trtlacto control_lacto trial    1.08561    0.02983  36.390  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) trtxcntrl_t
## trtxcntrl_t -0.072            
## trtlcntrl_t -0.063  0.548
abc1 <- glht(abc, mcp(trt='Tukey'))
abc2 <- cld(abc1)
pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
colors2 <- c("gray","red","blue")

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("aceto control_aceto trial" = "At","axenic control_axenic trial" = "Ax","lacto control_lacto trial" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p100g50y <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
    geom_segment(stat="identity",size=plot_text_size*1.75) + 
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,1))+
    theme_cowplot() + 
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), axis.text = element_blank(), legend.position = "none", axis.ticks = element_blank(), axis.line = element_line(size = 0.2)) +
    geom_errorbar(col = "black", ymax =pd3e$mean+pd3e$sem, ymin = pd3e$mean-pd3e$sem, width=0.25, size=.25) +
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("b","a","c"), y=.75), size=plot_text_size) + coord_flip()

p100g50y

#ggsave(h=2,w=3.5,"100G50Y.png")

100g25y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g25y%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100g25y%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 81.9 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1725581afd2.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2), data=pd2, family = "binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##   9709.1   9718.6  -4850.5   9701.1       76 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -20.9934  -6.0003   0.3233   5.8365  20.7788 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.01364  0.1168  
## Number of obs: 80, groups:  day2, 3
## 
## Fixed effects:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       -0.13532    0.07004  -1.932   0.0534 .  
## trtaxenic yeast100_axenic yeast25  0.28870    0.03485   8.285   <2e-16 ***
## trtlacto yeast100_lacto yeast25   -0.21559    0.02414  -8.929   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                  (Intr) trtxyst100_yst25
## trtxyst100_yst25 -0.169                 
## trtlyst100_yst25 -0.203  0.424
abc1 <- glht(abc, mcp(trt='Tukey'))
abc2 <- cld(abc1)
pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
colors2 <- c("gray","red","blue")

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("aceto yeast100_aceto yeast25" = "At","axenic yeast100_axenic yeast25" = "Ax","lacto yeast100_lacto yeast25" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p100g25y <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
    geom_segment(stat="identity",size=plot_text_size*1.75) + 
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,1))+
    theme_cowplot() + 
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), axis.text = element_blank(), legend.position = "none", axis.ticks = element_blank(), axis.line = element_line(size = 0.2)) +
    geom_errorbar(col = "black", ymax =pd3e$mean+pd3e$sem, ymin = pd3e$mean-pd3e$sem, width=0.25, size=.25) +
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","b","c"), y=.75), size=plot_text_size) + coord_flip()

p100g25y

#ggsave(h=2,w=3.5,"100G25Y.png")

50g50y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g100y%20vs%2050g50y%20+Date_JMC.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100g100y%20vs%2050g50y%20%2BDate_JMC.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 75.8 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1725b76ade.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2), data=pd2, family = "binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##   6690.7   6699.4  -3341.3   6682.7       61 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -19.9066  -5.1175   0.2775   7.7047  22.0951 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.01897  0.1377  
## Number of obs: 65, groups:  day2, 3
## 
## Fixed effects:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       -0.31468    0.10061  -3.128  0.00176 ** 
## trtaxenic ratio100_axenic ratio50  0.44075    0.17324   2.544  0.01095 *  
## trtlacto ratio100_lacto ratio50    0.32748    0.03305   9.908  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                (Intr) trtxrt100_rt50
## trtxrt100_rt50 -0.581               
## trtlrt100_rt50 -0.192  0.112
abc1 <- glht(abc, mcp(trt='Tukey'))
abc2 <- cld(abc1)
pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
colors2 <- c("gray","red","blue")

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("aceto ratio100_aceto ratio50" = "At","axenic ratio100_axenic ratio50" = "Ax","lacto ratio100_lacto ratio50" = "Lb"))
pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p50g50y <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
    geom_segment(stat="identity",size=plot_text_size*1.75) + 
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,1))+
    theme_cowplot() + 
#   theme(text = element_text(size=plot_text_size), axis.title = element_blank(), axis.text.x = element_text(angle = 45, hjust=1), legend.position = "none") +
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), axis.text = element_blank(), legend.position = "none", axis.ticks = element_blank(), axis.line = element_line(size = 0.2)) +
    geom_errorbar(col = "black", ymax =pd3e$mean+pd3e$sem, ymin = pd3e$mean-pd3e$sem, width=0.25, size=.25) +
#   labs(x="treatment",y = "Preference index") + 
#  annotate(geom="text", x= 1, y= 0.0, label = "10%Y 10%G",color="black", size=4, hjust = 0, fill="white", label.size=0, angle = 90) +
#  annotate(geom="text", x= 1, y= 1, label = "10%Y 10%G",color="black", size=4, hjust = 0, fill="white", label.size=0, angle = 90) + 
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("b","a","a"), y=.75), size=plot_text_size) + coord_flip()

p50g50y

#ggsave(h=2,w=3.5,"50G50Y.png")

50g25y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20to%2050.25+Date_JMC.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100.100%20to%2050.25%2BDate_JMC.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 58.9 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172263665a2.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
    droplevels()

pd2 <- pref_data
pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2), data=pd2, family = "binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##   5098.2   5104.6  -2545.1   5090.2       33 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -21.4700  -4.9480   0.5856  10.4811  20.0481 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.008709 0.09332 
## Number of obs: 37, groups:  day2, 2
## 
## Fixed effects:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                      -1.24521    0.07998  -15.57   <2e-16 ***
## trtaxenic 100to100_axenic 50to25  0.97993    0.05506   17.80   <2e-16 ***
## trtlacto 100to100_lacto 50to25    1.10690    0.04894   22.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) trtx100t100_50t25
## trtx100t100_50t25 -0.469                  
## trtl100t100_50t25 -0.472  0.682
abc1 <- glht(abc, mcp(trt='Tukey'))
abc2 <- cld(abc1)
pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
colors2 <- c("gray","red","blue")

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("aceto 100to100_aceto 50to25" = "At","axenic 100to100_axenic 50to25" = "Ax","lacto 100to100_lacto 50to25" = "Lb"))
pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p50g25y <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
    geom_segment(stat="identity",size=plot_text_size*1.75) + 
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,1))+
    theme_cowplot() + 
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), axis.text = element_blank(), legend.position = "none", axis.ticks = element_blank(), axis.line = element_line(size = 0.2)) +
    geom_errorbar(col = "black", ymax =pd3e$mean+pd3e$sem, ymin = pd3e$mean-pd3e$sem, width=0.25, size=.25) +
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","b","c"), y=.75), size=plot_text_size) + coord_flip()

p50g25y

#ggsave(h=2,w=3.5,"50G25Y.png")

Build the plot

pyaxis <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
  geom_blank() + 
  theme_cowplot() + 
  ylim(c(0,1)) + 
  coord_flip()+
  theme(axis.title = element_blank(), axis.line.x =element_blank(), axis.ticks.x=element_blank(), axis.text.x= element_blank(),axis.ticks.y = element_line(size = 0.2), axis.line.y = element_line(size = 0.2), axis.text.y = element_text(size = 6))

pxaxis <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
  geom_blank() + 
  theme_cowplot() + 
  ylim(c(0,1)) + 
  coord_flip()+
  theme(axis.title = element_blank(), axis.line.y =element_blank(), axis.ticks.y=element_blank(), axis.text.y= element_blank(),axis.ticks.x = element_line(size = 0.2), axis.line.x = element_line(size = 0.2), axis.text.x = element_text(angle = 45, hjust = 1, size = 6))

label_size = 4
l100g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "100g"), size = label_size)
l75g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "75g"), size = label_size)
l50g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "50g"), size = label_size)
l25g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "25g"), size = label_size)
l100y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "100g"), size = label_size)
l75y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "75g"), size = label_size)
l50y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "50g"), size = label_size)
l25y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "25g"), size = label_size)
ly <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Yeast"), size = label_size*1.25, angle = 90)
lg <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Glucose"), size = label_size*1.25)
ltrt <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Bacterial treatment"), size = label_size*.75, angle = 90)
lfp <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Preference index"), size = label_size*.75)
lcontrol <- ggplot() + theme_void() + geom_text(aes(0,0,label = "prefers control diet"), size = label_size*.75, angle = 90, color = "green")
ltrial <- ggplot() + theme_void() + geom_text(aes(0,0,label = "prefers trial diet"), size = label_size*.75, angle = 90, color = "green")

jpeg(h=4.5, w=6, "plot_all_2021-01-14.jpg", units = "in", res = 300)
  grid.arrange( 
        l25g, l50g, l75g, l100g, ## 1,2,3,4
        l25y,p50g25y,p100g25y,
        l50y,p50g50y,p100g50y,
        l75y,p75y100g,
        l100y,p25g100y,p50g100y,p100y75g,p100g100y,
        pxaxis,pyaxis,pxaxis,pyaxis,pxaxis,pyaxis,pxaxis,pyaxis, ## 20-25
        lg, ly, ltrt, lfp, ## 26, 27, 28, 29
        lcontrol,ltrial,
        widths = c(.3,.4,.2,.2,.2,1,1,1,1,.2),  
        heights = c(.3,.4,1,1,1,1,.3,.2),   
        layout_matrix=rbind(    
          c(NA,NA,NA,NA,NA,26,26,26,26,NA),
            c(NA,NA,NA,NA,NA,1, 2, 3, 4, NA),       
            c(27,5, 28,19,31,NA,6, NA,7, 30),
            c(27,8, 28,21,31,NA,9, NA,10,30),
            c(27,11,28,23,31,NA,NA,NA,12,30),
            c(27,13,28,25,31,14,15,16,17,30),
            c(NA,NA,NA,NA,NA,18,20,22,24,NA),
            c(NA,NA,NA,NA,NA,29,29,29,29,NA)
            )
        )
dev.off()

Figure S2A total sips ok

plot 100g100y

GET("http://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 102 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea17274129fea.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
  mutate(total = dieta+dietb) %>%
    droplevels()

pd2 <- pref_data

cat("KW test result")
## KW test result
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 0.95834, df = 2, p-value = 0.6193
efg <- dunn.test(pd2$total, pd2$trt, method = "bh", table = F, kw=F)
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
cat("N per group")
## N per group
table(pd2$trt)
## 
##   aceto 100g_aceto 100 axenic 100g_axenic 100   lacto 100g_lacto 100 
##                     31                     55                     19
pd3e <- pd2 %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

pd3e$abc2.x <- plyr::revalue(pd3e$trt, c("aceto 100g_aceto 100" = "At","axenic 100g_axenic 100" = "Ax","lacto 100g_lacto 100" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p100g100y <- ggplot(pd3e, aes( x=abc2.x, y = mean,fill = abc2.x)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,750))+
    theme_cowplot() +
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_blank(), axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.25) +
#   geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","a","a"), y=mean + sem + 80), size=plot_text_size)
p100g100y

#ggsave(h=2,w=3.5,"100G100Y.png")

plot 75y100g

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g75y%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100g75y%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 73.7 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1725aa427dd.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
  mutate(total = dieta+dietb) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

cat("KW test result")
## KW test result
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 2.5936, df = 2, p-value = 0.2734
efg <- dunn.test(pd2$total, pd2$trt, method = "bh", table = F, kw=F)
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
cat("N per group")
## N per group
table(pd2$trt)
## 
##   aceto control_aceto trial axenic control_axenic trial 
##                          27                          21 
##   lacto control_lacto trial 
##                          29
pd3e <- pd2 %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

pd3e$abc2.x <- plyr::revalue(pd3e$trt, c("aceto control_aceto trial" = "At","axenic control_axenic trial" = "Ax","lacto control_lacto trial" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p75y100g <- ggplot(pd3e, aes( x=abc2.x, y = mean,fill = abc2.x)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,750))+
    theme_cowplot() +
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_blank(), axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.25) +
#   geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","a","a"), y=mean + sem + 80), size=plot_text_size)

p75y100g

#ggsave(h=2,w=3.5,"75y100g.png")

50g100y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2050g100y%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%2050g100y%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 104 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172343bd238.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
  mutate(total = dieta+dietb) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

cat("KW test result")
## KW test result
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 7.4968, df = 2, p-value = 0.02356
efg <- dunn.test(pd2$total, pd2$trt, method = "bh", table = F, kw=F)
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
ghi <- cldList(comparison = efg$comparisons, p.value = efg$P.adjusted, threshold = 0.05)
cat("compact letter displays")
## compact letter displays
ghi
##               Group Letter MonoLetter
## 1   aceto1g_aceto5g      a         a 
## 2 axenic1g_axenic5g      b          b
## 3   lacto1g_lacto5g     ab         ab
cat("N per group")
## N per group
table(pd2$trt)
## 
##   aceto 100g_aceto 50g axenic 100g_axenic 50g   lacto 100g_lacto 50g 
##                     27                     57                     26
pd3e <- pd2 %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

pd3e$abc2.x <- plyr::revalue(pd3e$trt, c("aceto 100g_aceto 50g" = "At","axenic 100g_axenic 50g" = "Ax","lacto 100g_lacto 50g" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p50g100y <- ggplot(pd3e, aes( x=abc2.x, y = mean,fill = abc2.x)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,750))+
    theme_cowplot() +
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_blank(), axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.25) +
#   geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","b","ab"), y=mean + sem + 80), size=plot_text_size)

p50g100y

#ggsave(h=2,w=3.5,"50G100Y.png")

25g100y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2025g100y%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%2025g100y%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 102 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1727ba07d0f.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
  mutate(total = dieta+dietb) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

cat("KW test result")
## KW test result
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 17.677, df = 2, p-value = 0.000145
efg <- dunn.test(pd2$total, pd2$trt, method = "bh", table = F, kw=F)
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
ghi <- cldList(comparison = efg$comparisons, p.value = efg$P.adjusted, threshold = 0.05)
cat("compact letter displays")
## compact letter displays
ghi
##                Group Letter MonoLetter
## 1   aceto1g_aceto25g      a         a 
## 2 axenic1g_axenic25g      a         a 
## 3   lacto1g_lacto25g      b          b
cat("N per group")
## N per group
table(pd2$trt)
## 
##   aceto 100g_aceto 25g axenic 100g_axenic 25g   lacto 100g_lacto 25g 
##                     28                     57                     22
pd3e <- pd2 %>%
 # group_by(trt, day) %>%
#  summarize(total = mean(total)) %>%
#  ungroup() %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

pd3e$abc2.x <- plyr::revalue(pd3e$trt, c("aceto 100g_aceto 25g" = "At","axenic 100g_axenic 25g" = "Ax","lacto 100g_lacto 25g" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p25g100y <- ggplot(pd3e, aes( x=abc2.x, y = mean,fill = abc2.x)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,750))+
    theme_cowplot() +
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_blank(), axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.25) +
#   geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","a","b"), y=mean + sem + 80), size=plot_text_size)
p25g100y

#ggsave(h=2,w=3.5,"25G100Y.png")

75g100y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2075g100y%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%2075g100y%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 105 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172493819ff.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    filter(dieta<1000 & dietb<1000) %>%
  mutate(total = dieta+dietb) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

cat("KW test result")
## KW test result
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 7.6251, df = 2, p-value = 0.02209
efg <- dunn.test(pd2$total, pd2$trt, method = "bh", table = F, kw=F)
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
ghi <- cldList(comparison = efg$comparisons, p.value = efg$P.adjusted, threshold = 0.05)
cat("compact letter displays")
## compact letter displays
ghi
##              Group Letter MonoLetter
## 1   aceto1g_aceto1      a         a 
## 2 axenic1g_axenic1      b          b
## 3   lacto1g_lacto1      a         a
pd3e <- pd2 %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

cat("N per group")
## N per group
table(pd2$trt)
## 
##   aceto 100g_aceto 100 axenic 100g_axenic 100   lacto 100g_lacto 100 
##                     29                     52                     28
pd3e$abc2.x <- plyr::revalue(pd3e$trt, c("aceto 100g_aceto 100" = "At","axenic 100g_axenic 100" = "Ax","lacto 100g_lacto 100" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p100y75g <- ggplot(pd3e, aes( x=abc2.x, y = mean,fill = abc2.x)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,750))+
    theme_cowplot() +
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_blank(), axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.25) +
#   geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","b","a"), y=mean + sem + 80), size=plot_text_size)
p100y75g

#ggsave(h=2,w=3.5,"100y75g.png")

100g50y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g50y%20+Data.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100g50y%20%2BData.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 74.2 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172587fcdd1.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    filter(dieta<1000 & dietb<1000) %>%
  mutate(total = dieta+dietb) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

cat("KW test result")
## KW test result
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 15.747, df = 2, p-value = 0.0003808
efg <- dunn.test(pd2$total, pd2$trt, method = "bh", table = F, kw=F)
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
ghi <- cldList(comparison = efg$comparisons, p.value = efg$P.adjusted, threshold = 0.05)
cat("compact letter displays")
## compact letter displays
ghi
##                       Group Letter MonoLetter
## 1   acetocontrol_acetotrial      a         a 
## 2 axeniccontrol_axenictrial      b          b
## 3   lactocontrol_lactotrial      a         a
pd3e <- pd2 %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

cat("N per group")
## N per group
table(pd2$trt)
## 
##   aceto control_aceto trial axenic control_axenic trial 
##                          19                          27 
##   lacto control_lacto trial 
##                          30
pd3e$abc2.x <- plyr::revalue(pd3e$trt, c("aceto control_aceto trial" = "At","axenic control_axenic trial" = "Ax","lacto control_lacto trial" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))
 pd3e
## # A tibble: 3 × 4
##   trt                          mean   sem abc2.x
##   <fct>                       <dbl> <dbl> <fct> 
## 1 aceto control_aceto trial    602.  73.6 At    
## 2 axenic control_axenic trial  264.  51.8 Ax    
## 3 lacto control_lacto trial    489.  53.0 Lb
plot_text_size = 3
p100g50y <- ggplot(pd3e, aes( x=abc2.x, y = mean,fill = abc2.x)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,750))+
    theme_cowplot() +
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_blank(), axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.25) +
#   geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","b","a"), y=c(602+74+70, 264+52+80, 489+53+80)), size=plot_text_size)

p100g50y

#ggsave(h=2,w=3.5,"100G50Y.png")

100g25y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g25y%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100g25y%20%2BDate.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 81.9 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1726d4c33e7.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    filter(dieta<1000 & dietb<1000) %>%
  mutate(total = dieta+dietb) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

cat("KW test result")
## KW test result
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 3.5756, df = 2, p-value = 0.1673
efg <- dunn.test(pd2$total, pd2$trt, method = "bh", table = F, kw=F)
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
pd3e <- pd2 %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

cat("N per group")
## N per group
table(pd2$trt)
## 
##   aceto yeast100_aceto yeast25 axenic yeast100_axenic yeast25 
##                             29                             22 
##   lacto yeast100_lacto yeast25 
##                             29
pd3e$abc2.x <- plyr::revalue(pd3e$trt, c("aceto yeast100_aceto yeast25" = "At","axenic yeast100_axenic yeast25" = "Ax","lacto yeast100_lacto yeast25" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p100g25y <- ggplot(pd3e, aes( x=abc2.x, y = mean,fill = abc2.x)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,750))+
    theme_cowplot() +
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_blank(), axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.25) +
#   geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","a","a"), y=mean + sem + 80), size=plot_text_size)

p100g25y

#ggsave(h=2,w=3.5,"100G25Y.png")

50g50y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g100y%20vs%2050g50y%20+Date_JMC.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100g100y%20vs%2050g50y%20%2BDate_JMC.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 75.8 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1729f9a6af.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    filter(dieta<1000 & dietb<1000) %>%
  mutate(total = dieta+dietb) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))
cat("KW test result")
## KW test result
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 1.9274, df = 2, p-value = 0.3815
pd3e <- pd2 %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

cat("N per group")
## N per group
table(pd2$trt)
## 
##   aceto ratio100_aceto ratio50 axenic ratio100_axenic ratio50 
##                             21                             19 
##   lacto ratio100_lacto ratio50 
##                             25
pd3e$abc2.x <- plyr::revalue(pd3e$trt, c("aceto ratio100_aceto ratio50" = "At","axenic ratio100_axenic ratio50" = "Ax","lacto ratio100_lacto ratio50" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p50g50y <- ggplot(pd3e, aes( x=abc2.x, y = mean,fill = abc2.x)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,750))+
    theme_cowplot() +
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_blank(), axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.25) +
#   geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","a","a"), y=mean + sem + 80), size=plot_text_size)

p50g50y

#ggsave(h=2,w=3.5,"50G50Y.png")

50g25y

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20to%2050.25+Date_JMC.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat%20100.100%20to%2050.25%2BDate_JMC.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 58.9 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1723b63778f.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "Date")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    filter(dieta<1000 & dietb<1000) %>%
  mutate(total = dieta+dietb) %>%
    droplevels()

pd2 <- pref_data

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

cat("KW test result")
## KW test result
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 0.34876, df = 2, p-value = 0.84
pd3e <- pd2 %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

cat("N per group")
## N per group
table(pd2$trt)
## 
##   aceto 100to100_aceto 50to25 axenic 100to100_axenic 50to25 
##                             8                            10 
##   lacto 100to100_lacto 50to25 
##                            19
pd3e$abc2.x <- plyr::revalue(pd3e$trt, c("aceto 100to100_aceto 50to25" = "At","axenic 100to100_axenic 50to25" = "Ax","lacto 100to100_lacto 50to25" = "Lb"))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax","At","Lb"))

plot_text_size = 3
p50g25y <- ggplot(pd3e, aes( x=abc2.x, y = mean,fill = abc2.x)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
  scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,750))+
    theme_cowplot() +
    theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_blank(), axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.25) +
#   geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=c("a","a","a"), y=mean + sem + 80), size=plot_text_size)

p50g25y

#ggsave(h=2,w=3.5,"50G25Y.png")

Build the plot

pyaxis <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
  geom_blank() + 
  theme_cowplot() + 
  ylim(c(0,750)) + 
  theme(axis.title = element_blank(), axis.line.x =element_blank(), axis.ticks.x=element_blank(), axis.text.x= element_blank(),axis.ticks.y = element_line(size = 0.2), axis.line.y = element_line(size = 0.2), axis.text.y = element_text(size = 6))

pxaxis <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
  geom_blank() + 
  theme_cowplot() + 
  ylim(c(0,750)) + 
    theme(axis.title = element_blank(), axis.line.y =element_blank(), axis.ticks.y=element_blank(), axis.text.y= element_blank(),axis.ticks.x = element_line(size = 0.2), axis.line.x = element_line(size = 0.2), axis.text.x = element_text(angle = 45, hjust = 1, size = 6))

label_size = 4
l100g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "100g"), size = label_size)
l75g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "75g"), size = label_size)
l50g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "50g"), size = label_size)
l25g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "25g"), size = label_size)
l100y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "100g"), size = label_size)
l75y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "75g"), size = label_size)
l50y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "50g"), size = label_size)
l25y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "25g"), size = label_size)
ly <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Yeast"), size = label_size*1.25, angle = 90)
lg <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Glucose"), size = label_size*1.25)
ltrt <- ggplot() + theme_void() + geom_text(aes(0,0,label = "# of sips"), size = label_size*.75, angle = 90)
lfp <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Bacterial treatment"), size = label_size*.75)
lcontrol <- ggplot() + theme_void() + geom_text(aes(0,0,label = "prefers control diet"), size = label_size*.75, angle = 90, color = "green")
ltrial <- ggplot() + theme_void() + geom_text(aes(0,0,label = "prefers trial diet"), size = label_size*.75, angle = 90, color = "green")

jpeg(h=4.5, w=6, "plot_all_totals_2021-01-14.jpg", units = "in", res = 300)
  grid.arrange( 
        l25g, l50g, l75g, l100g, ## 1,2,3,4
        l25y,p50g25y,p100g25y,
        l50y,p50g50y,p100g50y,
        l75y,p75y100g,
        l100y,p25g100y,p50g100y,p100y75g,p100g100y,
        pxaxis,pyaxis,pxaxis,pyaxis,pxaxis,pyaxis,pxaxis,pyaxis, ## 20-25
        lg, ly, ltrt, lfp, ## 26, 27, 28, 29
#       lcontrol,ltrial,
        widths = c(.3,.4,.2,.2,.05,1,1,1,1,.05),    
        heights = c(.3,.4,1,1,1,1,.3,.2),   
        layout_matrix=rbind(    
          c(NA,NA,NA,NA,NA,26,26,26,26,NA),
            c(NA,NA,NA,NA,NA,1, 2, 3, 4, NA),       
            c(27,5, 28,19,NA,NA,6, NA,7, NA),
            c(27,8, 28,21,NA,NA,9, NA,10,NA),
            c(27,11,28,23,NA,NA,NA,NA,12,NA),
            c(27,13,28,25,NA,14,15,16,17,NA),
            c(NA,NA,NA,NA,NA,18,20,22,24,NA),
            c(NA,NA,NA,NA,NA,29,29,29,29,NA)
            )
        )
dev.off()

  getwd()

Figure S2B sip duration

# plot 100g100y
sd_100g100y <- calc_feeding_time_stats("http://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20+Date.xls?raw=true")
## 
## Ax.dieta At.dieta Lb.dieta Ax.dietb At.dietb Lb.dietb 
##       52       31       17       52       31       17 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 6.9493, df = 5, p-value = 0.2244
sd_100g100y

# plot 75y100g
sd_75y100g <- calc_feeding_time_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g75y%20+Date.xls?raw=true")
## 
## Ax.dieta At.dieta Lb.dieta Ax.dietb At.dietb Lb.dietb 
##       21       24       28       21       24       28 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 5.6811, df = 5, p-value = 0.3385
sd_75y100g

### 50g100y
sd_50g100y <- calc_feeding_time_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2050g100y%20+Date.xls?raw=true")
## 
## Ax.dieta At.dieta Lb.dieta Ax.dietb At.dietb Lb.dietb 
##       45       26       21       45       26       21 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 7.5478, df = 5, p-value = 0.183
sd_50g100y

### 25g100y
sd_25g100y <- calc_feeding_time_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2025g100y%20+Date.xls?raw=true")
## 
## Ax.dieta At.dieta Lb.dieta Ax.dietb At.dietb Lb.dietb 
##       48       24       20       48       24       20 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 15.45, df = 5, p-value = 0.008603
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
sd_25g100y

### 75g100y
sd_100y75g <- calc_feeding_time_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2075g100y%20+Date.xls?raw=true")
## 
## Ax.dieta At.dieta Lb.dieta Ax.dietb At.dietb Lb.dietb 
##       49       28       27       49       28       27 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 5.2413, df = 5, p-value = 0.3871
sd_100y75g

### 100g50y
sd_100g50y <- calc_feeding_time_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g50y%20+Data.xls?raw=true")
## 
## Ax.dieta At.dieta Lb.dieta Ax.dietb At.dietb Lb.dietb 
##       27       18       29       27       18       29 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 19.24, df = 5, p-value = 0.001734
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
sd_100g50y

### 100g25y
sd_100g25y <- calc_feeding_time_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g25y%20+Date.xls?raw=true")
## 
## Ax.dieta At.dieta Lb.dieta Ax.dietb At.dietb Lb.dietb 
##       22       29       29       22       29       29 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 14.993, df = 5, p-value = 0.01039
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
sd_100g25y

### 50g50y
sd_50g50y <- calc_feeding_time_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g100y%20vs%2050g50y%20+Date_JMC.xls?raw=true")
## 
## Ax.dieta At.dieta Lb.dieta Ax.dietb At.dietb Lb.dietb 
##       19       21       25       19       21       25 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 5.7453, df = 5, p-value = 0.3318
sd_50g50y

### 50g25y
sd_50g25y <- calc_feeding_time_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20to%2050.25+Date_JMC.xls?raw=true")
## 
## Ax.dieta At.dieta Lb.dieta Ax.dietb At.dietb Lb.dietb 
##       10        8       19       10        8       19 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 9.1512, df = 5, p-value = 0.1032
sd_50g25y

Build the plot

GET("http://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))

aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "SipDurations")

ccc <- aaa
out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
j=3
  for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aab[,(i+j)],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,c(1)]) & lp2[,c(1)]!=-1 & !is.na(lp2[,c(3)]) & lp2[,c(3)]!=-1 & !is.na(lp2[,c(4)]) & lp2[,c(4)]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time1=lp3[,3], time2 =lp3[,4],dieta=lp3[,5], dietb = lp3[,6]) %>% dplyr::select(trt,pref,day,time = time1, dieta=time1, dietb=time2) 
    out_df <- rbind(out_df,lp4)
  }
  
  pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    mutate(total = dieta+dietb) %>%
    droplevels() %>%
    reshape2::melt(measure.vars = c("dieta", "dietb"),variable.name = "time_consuming_diet")

  pref_data$trt2 <- factor(pref_data$trt, labels = c("At","Ax","Lb"))
  pref_data$trt2 <- factor(pref_data$trt2, levels = c("Ax","At","Lb"))
  pref_data$tt <- with(pref_data, interaction(trt2, time_consuming_diet))
  
  pd3e <- pref_data %>%
    group_by(tt) %>%
    summarize(mean = mean(value), sem= sd(value)/sqrt(dplyr::n())) %>%
    ungroup()
  
  colors2 <- c("gray","gray","red","red","blue","blue")
  
  pd3e$abc2.x <- plyr::revalue(pd3e$tt, c("Ax.dieta" = "Ax control","At.dieta" = "At control", "Lb.dieta" = "Lb control","Ax.dietb" = "Ax trial", "At.dietb" = "At trial", "Lb.dietb" = "Lb trial"))
  pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax control","Ax trial","At control", "At trial", "Lb control", "Lb trial"))

pyaxis <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
  geom_blank() + 
  theme_cowplot() + 
  ylim(c(0,0.3)) + 
  theme(axis.title = element_blank(), axis.line.x =element_blank(), axis.ticks.x=element_blank(), axis.text.x= element_blank(),axis.ticks.y = element_line(size = 0.2), axis.line.y = element_line(size = 0.2), axis.text.y = element_text(size = 6))

pxaxis <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
  geom_blank() + 
  theme_cowplot() + 
  ylim(c(0,.3)) + 
    theme(axis.title = element_blank(), axis.line.y =element_blank(), axis.ticks.y=element_blank(), axis.text.y= element_blank(),axis.ticks.x = element_line(size = 0.2), axis.line.x = element_line(size = 0.2), axis.text.x = element_text(angle = 45, hjust = 1, size = 6))

label_size = 4
l100g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "100g"), size = label_size)
l75g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "75g"), size = label_size)
l50g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "50g"), size = label_size)
l25g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "25g"), size = label_size)
l100y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "100g"), size = label_size)
l75y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "75g"), size = label_size)
l50y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "50g"), size = label_size)
l25y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "25g"), size = label_size)
ly <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Yeast"), size = label_size*1.25, angle = 90)
lg <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Glucose"), size = label_size*1.25)
ltrt <- ggplot() + theme_void() + geom_text(aes(0,0,label = "sip duration (s)"), size = label_size*.75, angle = 90)
lfp <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Bacterial treatment and diet choice"), size = label_size*.75)
lcontrol <- ggplot() + theme_void() + geom_text(aes(0,0,label = ""), size = label_size*.75, angle = 90, color = "green")
ltrial <- ggplot() + theme_void() + geom_text(aes(0,0,label = ""), size = label_size*.75, angle = 90, color = "green")

jpeg(h=4.5, w=6, "plot_sip_durations_addfilter.jpg", units = "in", res = 300)
  grid.arrange( 
        l25g, l50g, l75g, l100g, ## 1,2,3,4
        l25y,sd_50g25y,sd_100g25y,
        l50y,sd_50g50y,sd_100g50y,
        l75y,sd_75y100g,
        l100y,sd_25g100y,sd_50g100y,sd_100y75g,sd_100g100y,
        pxaxis,pyaxis,pxaxis,pyaxis,pxaxis,pyaxis,pxaxis,pyaxis, ## 20-25
        lg, ly, ltrt, lfp, ## 26, 27, 28, 29
#       lcontrol,ltrial,
        widths = c(.3,.4,.2,.2,.05,1,1,1,1,.05),    
        heights = c(.3,.4,1,1,1,1,.3,.2),   
        layout_matrix=rbind(    
          c(NA,NA,NA,NA,NA,26,26,26,26,NA),
            c(NA,NA,NA,NA,NA,1, 2, 3, 4, NA),       
            c(27,5, 28,19,NA,NA,6, NA,7, NA),
            c(27,8, 28,21,NA,NA,9, NA,10,NA),
            c(27,11,28,23,NA,NA,NA,NA,12,NA),
            c(27,13,28,25,NA,14,15,16,17,NA),
            c(NA,NA,NA,NA,NA,18,20,22,24,NA),
            c(NA,NA,NA,NA,NA,29,29,29,29,NA)
            )
        )
dev.off()

Figure S2C sip distribution

# plot 100g100y
sd_100g100y <- calc_sip_stats("http://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20+Date.xls?raw=true")
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       55       31       19       55       31       19 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 3.9749, df = 5, p-value = 0.553
sd_100g100y

# plot 75y100g
sd_75y100g <- calc_sip_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g75y%20+Date.xls?raw=true")
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       21       27       29       21       27       29 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 8.3727, df = 5, p-value = 0.1369
sd_75y100g

### 50g100y
sd_50g100y <- calc_sip_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2050g100y%20+Date.xls?raw=true")
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       57       27       26       57       27       26 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 9.3459, df = 5, p-value = 0.09604
sd_50g100y

### 25g100y
sd_25g100y <- calc_sip_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2025g100y%20+Date.xls?raw=true")
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       57       28       22       57       28       22 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 44.068, df = 5, p-value = 2.244e-08
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
sd_25g100y

### 75g100y
sd_100y75g <- calc_sip_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2075g100y%20+Date.xls?raw=true")
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       52       29       28       52       29       28 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 16.759, df = 5, p-value = 0.00498
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
sd_100y75g

### 100g50y
sd_100g50y <- calc_sip_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g50y%20+Data.xls?raw=true")
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       27       19       30       27       19       30 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 32.318, df = 5, p-value = 5.14e-06
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
sd_100g50y

### 100g25y
sd_100g25y <- calc_sip_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g25y%20+Date.xls?raw=true")
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       22       29       29       22       29       29 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 8.0435, df = 5, p-value = 0.1539
sd_100g25y

### 50g50y
sd_50g50y <- calc_sip_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g100y%20vs%2050g50y%20+Date_JMC.xls?raw=true")
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       19       21       25       19       21       25 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 4.4111, df = 5, p-value = 0.4919
sd_50g50y

### 50g25y
sd_50g25y <- calc_sip_stats("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20to%2050.25+Date_JMC.xls?raw=true")
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       10        8       19       10        8       19 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 2.6602, df = 5, p-value = 0.7522
sd_50g25y

Build the plot

GET("http://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))

aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "SipDurations")

ccc <- aaa
out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())

  for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aab[,(i+j)],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,c(1)]) & lp2[,c(1)]!=-1 & !is.na(lp2[,c(3)]) & lp2[,c(3)]!=-1 & !is.na(lp2[,c(4)]) & lp2[,c(4)]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time1=lp3[,3], time2 =lp3[,4],dieta=lp3[,5], dietb = lp3[,6]) %>% dplyr::select(trt,pref,day,time = time1, dieta=time1, dietb=time2) 
    out_df <- rbind(out_df,lp4)
  }
  
  pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    mutate(total = dieta+dietb) %>%
    droplevels() %>%
    reshape2::melt(measure.vars = c("dieta", "dietb"),variable.name = "time_consuming_diet")

  pref_data$trt2 <- factor(pref_data$trt, labels = c("At","Ax","Lb"))
  pref_data$trt2 <- factor(pref_data$trt2, levels = c("Ax","At","Lb"))
  pref_data$tt <- with(pref_data, interaction(trt2, time_consuming_diet))
  
  pd3e <- pref_data %>%
    group_by(tt) %>%
    summarize(mean = mean(value), sem= sd(value)/sqrt(dplyr::n())) %>%
    ungroup()
  
  colors2 <- c("gray","gray","red","red","blue","blue")
  
  pd3e$abc2.x <- plyr::revalue(pd3e$tt, c("Ax.dieta" = "Ax control","At.dieta" = "At control", "Lb.dieta" = "Lb control","Ax.dietb" = "Ax trial", "At.dietb" = "At trial", "Lb.dietb" = "Lb trial"))
  pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax control","Ax trial","At control", "At trial", "Lb control", "Lb trial"))

pyaxis <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
  geom_blank() + 
  theme_cowplot() + 
  ylim(c(0,550)) + 
  theme(axis.title = element_blank(), axis.line.x =element_blank(), axis.ticks.x=element_blank(), axis.text.x= element_blank(),axis.ticks.y = element_line(size = 0.2), axis.line.y = element_line(size = 0.2), axis.text.y = element_text(size = 6))

pxaxis <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
  geom_blank() + 
  theme_cowplot() + 
  ylim(c(0,550)) + 
    theme(axis.title = element_blank(), axis.line.y =element_blank(), axis.ticks.y=element_blank(), axis.text.y= element_blank(),axis.ticks.x = element_line(size = 0.2), axis.line.x = element_line(size = 0.2), axis.text.x = element_text(angle = 45, hjust = 1, size = 6))

label_size = 4
l100g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "100g"), size = label_size)
l75g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "75g"), size = label_size)
l50g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "50g"), size = label_size)
l25g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "25g"), size = label_size)
l100y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "100g"), size = label_size)
l75y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "75g"), size = label_size)
l50y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "50g"), size = label_size)
l25y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "25g"), size = label_size)
ly <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Yeast"), size = label_size*1.25, angle = 90)
lg <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Glucose"), size = label_size*1.25)
ltrt <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Total number of sips"), size = label_size*.75, angle = 90)
lfp <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Bacterial treatment and diet choice"), size = label_size*.75)
lcontrol <- ggplot() + theme_void() + geom_text(aes(0,0,label = ""), size = label_size*.75, angle = 90, color = "green")
ltrial <- ggplot() + theme_void() + geom_text(aes(0,0,label = ""), size = label_size*.75, angle = 90, color = "green")

jpeg(h=4.5, w=6, "plot_sip_distribution_addfilter.jpg", units = "in", res = 300)
  grid.arrange( 
        l25g, l50g, l75g, l100g, ## 1,2,3,4
        l25y,sd_50g25y,sd_100g25y,
        l50y,sd_50g50y,sd_100g50y,
        l75y,sd_75y100g,
        l100y,sd_25g100y,sd_50g100y,sd_100y75g,sd_100g100y,
        pxaxis,pyaxis,pxaxis,pyaxis,pxaxis,pyaxis,pxaxis,pyaxis, ## 20-25
        lg, ly, ltrt, lfp, ## 26, 27, 28, 29
#       lcontrol,ltrial,
        widths = c(.3,.4,.2,.2,.05,1,1,1,1,.05),    
        heights = c(.3,.4,1,1,1,1,.3,.2),   
        layout_matrix=rbind(    
          c(NA,NA,NA,NA,NA,26,26,26,26,NA),
            c(NA,NA,NA,NA,NA,1, 2, 3, 4, NA),       
            c(27,5, 28,19,NA,NA,6, NA,7, NA),
            c(27,8, 28,21,NA,NA,9, NA,10,NA),
            c(27,11,28,23,NA,NA,NA,NA,12,NA),
            c(27,13,28,25,NA,14,15,16,17,NA),
            c(NA,NA,NA,NA,NA,18,20,22,24,NA),
            c(NA,NA,NA,NA,NA,29,29,29,29,NA)
            )
        )
dev.off()

Figure S2D sip distribution

# plot 100g100y
calc_sip_stats_axenic("http://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20+Date.xls?raw=true", fileadd = "100g100y", append_val = F)
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       55       31       19       55       31       19 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 3.9749, df = 5, p-value = 0.553
calc_sip_stats_axenic("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g75y%20+Date.xls?raw=true", fileadd = "sd_75y100g", append_val = T)
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       21       27       29       21       27       29 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 8.3727, df = 5, p-value = 0.1369
calc_sip_stats_axenic("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2050g100y%20+Date.xls?raw=true", fileadd = "sd_50g100y", append_val = T)
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       57       27       26       57       27       26 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 9.3459, df = 5, p-value = 0.09604
calc_sip_stats_axenic("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2025g100y%20+Date.xls?raw=true", fileadd = "sd_25g100y", append_val = T)
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       57       28       22       57       28       22 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 44.068, df = 5, p-value = 2.244e-08
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
calc_sip_stats_axenic("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2075g100y%20+Date.xls?raw=true", fileadd = "sd_100y75g", append_val = T)
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       52       29       28       52       29       28 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 16.759, df = 5, p-value = 0.00498
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
calc_sip_stats_axenic("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g50y%20+Data.xls?raw=true", fileadd = "sd_100g50y", append_val = T)
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       27       19       30       27       19       30 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 32.318, df = 5, p-value = 5.14e-06
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
calc_sip_stats_axenic("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g25y%20+Date.xls?raw=true", fileadd = "sd_100g25y", append_val = T)
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       22       29       29       22       29       29 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 8.0435, df = 5, p-value = 0.1539
calc_sip_stats_axenic("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g100y%20vs%2050g50y%20+Date_JMC.xls?raw=true", fileadd = "sd_50g50y", append_val = T)
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       19       21       25       19       21       25 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 4.4111, df = 5, p-value = 0.4919
calc_sip_stats_axenic("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20to%2050.25+Date_JMC.xls?raw=true", fileadd = "sd_50g25y", append_val = T)
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       10        8       19       10        8       19 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 2.6602, df = 5, p-value = 0.7522
# plot 100g100y
sd_100g100y <- calc_sip_stats_axenic_plots("http://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20+Date.xls?raw=true", sigvals = c(""))
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       55       31       19       55       31       19 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 3.9749, df = 5, p-value = 0.553
sd_100g100y

# plot 75y100g
sd_75y100g <- calc_sip_stats_axenic_plots("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g75y%20+Date.xls?raw=true", sigvals = c(""))
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       21       27       29       21       27       29 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 8.3727, df = 5, p-value = 0.1369
sd_75y100g

### 50g100y
sd_50g100y <- calc_sip_stats_axenic_plots("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2050g100y%20+Date.xls?raw=true", sigvals = c(""))
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       57       27       26       57       27       26 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 9.3459, df = 5, p-value = 0.09604
sd_50g100y

### 25g100y
sd_25g100y <- calc_sip_stats_axenic_plots("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2025g100y%20+Date.xls?raw=true", sigvals = c("","","*","","","*"))
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       57       28       22       57       28       22 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 44.068, df = 5, p-value = 2.244e-08
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
sd_25g100y

### 75g100y
sd_100y75g <- calc_sip_stats_axenic_plots("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%2075g100y%20+Date.xls?raw=true", sigvals = c("","*","*","","",""))
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       52       29       28       52       29       28 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 16.759, df = 5, p-value = 0.00498
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
sd_100y75g

### 100g50y
sd_100g50y <- calc_sip_stats_axenic_plots("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g50y%20+Data.xls?raw=true", sigvals = c("","", "*", "", "*", "*"))
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       27       19       30       27       19       30 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 32.318, df = 5, p-value = 5.14e-06
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
sd_100g50y

### 100g25y
sd_100g25y <- calc_sip_stats_axenic_plots("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g25y%20+Date.xls?raw=true", sigvals = c(""))
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       22       29       29       22       29       29 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 8.0435, df = 5, p-value = 0.1539
sd_100g25y

### 50g50y
sd_50g50y <- calc_sip_stats_axenic_plots("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100g100y%20vs%2050g50y%20+Date_JMC.xls?raw=true", sigvals = c(""))
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       19       21       25       19       21       25 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 4.4111, df = 5, p-value = 0.4919
sd_50g50y

### 50g25y
sd_50g25y <- calc_sip_stats_axenic_plots("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20to%2050.25+Date_JMC.xls?raw=true", sigvals = c(""))
## 
## Ax.sipsA At.sipsA Lb.sipsA Ax.sipsB At.sipsB Lb.sipsB 
##       10        8       19       10        8       19 
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 2.6602, df = 5, p-value = 0.7522
sd_50g25y

Build the plot

GET("http://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat%20100.100%20vs%20100.00%20+Date.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))

aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "Date")
aab <- read_xls(tf, sheet = "SipDurations")

ccc <- aaa
out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())

  for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aab[,(i+j)],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,c(1)]) & lp2[,c(1)]!=-1 & !is.na(lp2[,c(3)]) & lp2[,c(3)]!=-1 & !is.na(lp2[,c(4)]) & lp2[,c(4)]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time1=lp3[,3], time2 =lp3[,4],dieta=lp3[,5], dietb = lp3[,6]) %>% dplyr::select(trt,pref,day,time = time1, dieta=time1, dietb=time2) 
    out_df <- rbind(out_df,lp4)
  }
  
  pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    mutate(total = dieta+dietb) %>%
    droplevels() %>%
    reshape2::melt(measure.vars = c("dieta", "dietb"),variable.name = "time_consuming_diet")

  pref_data$trt2 <- factor(pref_data$trt, labels = c("At","Ax","Lb"))
  pref_data$trt2 <- factor(pref_data$trt2, levels = c("Ax","At","Lb"))
  pref_data$tt <- with(pref_data, interaction(trt2, time_consuming_diet))
  
  pd3e <- pref_data %>%
    group_by(tt) %>%
    summarize(mean = mean(value), sem= sd(value)/sqrt(dplyr::n())) %>%
    ungroup()
  
  colors2 <- c("gray","gray","red","red","blue","blue")
  
  pd3e$abc2.x <- plyr::revalue(pd3e$tt, c("Ax.dieta" = "Ax control","At.dieta" = "At control", "Lb.dieta" = "Lb control","Ax.dietb" = "Ax trial", "At.dietb" = "At trial", "Lb.dietb" = "Lb trial"))
  pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("Ax control","Ax trial","At control", "At trial", "Lb control", "Lb trial"))

pyaxis <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
  geom_blank() + 
  theme_cowplot() + 
  ylim(c(0,550)) + 
  theme(axis.title = element_blank(), axis.line.x =element_blank(), axis.ticks.x=element_blank(), axis.text.x= element_blank(),axis.ticks.y = element_line(size = 0.2), axis.line.y = element_line(size = 0.2), axis.text.y = element_text(size = 6))

pxaxis <- ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(abc2.x))))), yend=mean, col = abc2.x)) + 
  geom_blank() + 
  theme_cowplot() + 
  ylim(c(0,550)) + 
    theme(axis.title = element_blank(), axis.line.y =element_blank(), axis.ticks.y=element_blank(), axis.text.y= element_blank(),axis.ticks.x = element_line(size = 0.2), axis.line.x = element_line(size = 0.2), axis.text.x = element_text(angle = 45, hjust = 1, size = 6))

label_size = 4
l100g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "100g"), size = label_size)
l75g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "75g"), size = label_size)
l50g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "50g"), size = label_size)
l25g <- ggplot() + theme_void() + geom_text(aes(0,0,label = "25g"), size = label_size)
l100y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "100g"), size = label_size)
l75y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "75g"), size = label_size)
l50y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "50g"), size = label_size)
l25y <- ggplot() + theme_void() + geom_text(aes(0,0,label = "25g"), size = label_size)
ly <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Yeast"), size = label_size*1.25, angle = 90)
lg <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Glucose"), size = label_size*1.25)
ltrt <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Total number of sips"), size = label_size*.75, angle = 90)
lfp <- ggplot() + theme_void() + geom_text(aes(0,0,label = "Bacterial treatment and diet choice"), size = label_size*.75)
lcontrol <- ggplot() + theme_void() + geom_text(aes(0,0,label = ""), size = label_size*.75, angle = 90, color = "green")
ltrial <- ggplot() + theme_void() + geom_text(aes(0,0,label = ""), size = label_size*.75, angle = 90, color = "green")

jpeg(h=4.5, w=6, "plot_sip_distribution_vAX_addfilter.jpg", units = "in", res = 300)
  grid.arrange( 
        l25g, l50g, l75g, l100g, ## 1,2,3,4
        l25y,sd_50g25y,sd_100g25y,
        l50y,sd_50g50y,sd_100g50y,
        l75y,sd_75y100g,
        l100y,sd_25g100y,sd_50g100y,sd_100y75g,sd_100g100y,
        pxaxis,pyaxis,pxaxis,pyaxis,pxaxis,pyaxis,pxaxis,pyaxis, ## 20-25
        lg, ly, ltrt, lfp, ## 26, 27, 28, 29
#       lcontrol,ltrial,
        widths = c(.3,.4,.2,.2,.05,1,1,1,1,.05),    
        heights = c(.3,.4,1,1,1,1,.3,.2),   
        layout_matrix=rbind(    
          c(NA,NA,NA,NA,NA,26,26,26,26,NA),
            c(NA,NA,NA,NA,NA,1, 2, 3, 4, NA),       
            c(27,5, 28,19,NA,NA,6, NA,7, NA),
            c(27,8, 28,21,NA,NA,9, NA,10,NA),
            c(27,11,28,23,NA,NA,NA,NA,12,NA),
            c(27,13,28,25,NA,14,15,16,17,NA),
            c(NA,NA,NA,NA,NA,18,20,22,24,NA),
            c(NA,NA,NA,NA,NA,29,29,29,29,NA)
            )
        )
dev.off()

Figure 2

Figure 2

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 347 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172640ee1b9.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat-ByDay.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat-ByDay.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 372 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172d056c01.xls
abb <- read_xls(tf, sheet = "Per Day")

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat-ByDay-ByTime.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat-ByDay-ByTime.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 389 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1725ef5bc54.xls
aab <- read_xls(tf, sheet = "By Time")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
i=1
for(i in 1:41) {
    lacto_pref <- (ccc[,i]/(ccc[,(41+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(41+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% 
      mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+41])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% 
      dplyr::select(trt,pref,day,time,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 100g100y", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 100g50y", replacement = "",x = trt,perl = T)) %>%
    filter(!trt%in%c("disregard-111_disregard-111","disregard-68_disregard-68")) %>%
    mutate(trt=factor(trt)) %>%
    filter(!trt%in%c("axenic_axenic","67_67","25_25","48_48","32_32")) %>% 
    droplevels()

#write.csv(pref_data, "pref_data.csv")

pd2 <- read.csv(url("https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/lookup_code.csv")) %>% inner_join(pref_data, by=c("trt")) %>% filter(!is.na(time)) %>% droplevels()

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$time2 <- as.factor(as.character(pd2$time))
pd2$code <- as.factor(as.character(pd2$code))
pd2$td <- paste0(pd2$time,pd2$day)

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2) + (1|time2), data=pd2, family="binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2) + (1 | time2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##  25390.6  25530.1 -12661.3  25322.6      413 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -18.1226  -4.6065  -0.6294   4.1556  23.7092 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.06125  0.2475  
##  time2  (Intercept) 0.04545  0.2132  
## Number of obs: 447, groups:  day2, 6; time2, 3
## 
## Fixed effects:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.708017   0.166005  -4.265 2.00e-05 ***
## trt11cb6_11cb6 -0.869542   0.065859 -13.203  < 2e-16 ***
## trt11cb7_11cb7  0.711423   0.072644   9.793  < 2e-16 ***
## trt11ct2_11ct2  0.783708   0.059086  13.264  < 2e-16 ***
## trt13_13       -0.621678   0.068865  -9.028  < 2e-16 ***
## trt137_137     -0.008885   0.103645  -0.086  0.93169    
## trt15_15       -0.152676   0.071725  -2.129  0.03328 *  
## trt16_16       -0.268730   0.067874  -3.959 7.52e-05 ***
## trt181_181      0.434242   0.074895   5.798 6.71e-09 ***
## trt196_196      0.179261   0.067527   2.655  0.00794 ** 
## trt1ab7_1ab7    0.509851   0.067504   7.553 4.26e-14 ***
## trt28_28        0.575971   0.061722   9.332  < 2e-16 ***
## trt29_29       -0.020965   0.072825  -0.288  0.77344    
## trt3_3          1.165184   0.059697  19.518  < 2e-16 ***
## trt30_30       -0.314009   0.065585  -4.788 1.69e-06 ***
## trt31_31        1.189718   0.057141  20.821  < 2e-16 ***
## trt34_34       -0.135258   0.076837  -1.760  0.07835 .  
## trt35_35        0.292489   0.064968   4.502 6.73e-06 ***
## trt39_39        0.064173   0.060739   1.057  0.29072    
## trt4_4          0.724923   0.066967  10.825  < 2e-16 ***
## trt43_43        0.291211   0.060632   4.803 1.56e-06 ***
## trt45_45        1.076969   0.058656  18.361  < 2e-16 ***
## trt5_5         -0.058369   0.061599  -0.948  0.34336    
## trt52_52        0.529634   0.058548   9.046  < 2e-16 ***
## trt56_56        0.283215   0.064559   4.387 1.15e-05 ***
## trt6_6          1.138412   0.064674  17.602  < 2e-16 ***
## trt66_66        1.115823   0.065113  17.137  < 2e-16 ***
## trt69_69       -0.090986   0.070060  -1.299  0.19405    
## trt70_70        0.697557   0.072816   9.580  < 2e-16 ***
## trt73_73        0.664535   0.062588  10.618  < 2e-16 ***
## trt75_75       -0.477031   0.068824  -6.931 4.17e-12 ***
## trt98_98        0.425035   0.059072   7.195 6.23e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
def <- summary(abc)
  # Runs the stats, pounded out to prevent timeout
# abc1 <- glht(abc, mcp(trt='Tukey'))
# abc2 <- cld(abc1)
# efg <- data.frame(cbind(abc2$lp, abc2$x,abc2$xname ))
#pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
#write.csv(pd3e, 'fig2-pd3e.csv')
#letters_orderA <- data.frame(abc2$mcletters$Letters) %>% tibble::rownames_to_column('name')
#write.csv(letters_orderA, "letters_orderA.csv")

efg <- read.csv(url('https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/fig2-efg.csv'))
fgh <- efg %>% group_by(X2) %>% dplyr::summarize(mean = mean(X1), sem = sd(X1)/sqrt(sum(X1>-1)))
pd3e <- read.csv(url('https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/fig2-pd3e.csv'))
letters_orderA <- read.csv(url("https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/letters_orderA.csv"))

colors2 <- c("gray")

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("1_1","11cb6_11cb6","11cb7_11cb7","11ct2_11ct2","13_13","137_137","15_15","16_16","181_181","196_196","1ab7_1ab7","28_28","29_29","3_3","30_30","31_31","34_34","35_35","39_39","4_4","43_43","45_45","5_5","52_52","56_56","6_6","66_66","69_69","70_70","73_73","75_75","98_98"))

efg2 <- efg %>% mutate(X2 = as.character(X2)) %>% full_join(read.csv(url("https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/lookup_code.csv")), by = c("X2" = "singlecode"))

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("1_1" = "lbrc",
                                            "11cb6_11cb6" = "wp07",
                                            "11cb7_11cb7" = "wp13",
                                            "11ct2_11ct2"="asl5",
                                            "13_13"="bsub", 
                                            "137_137" = "lc37", 
                                            "15_15" = "ecok", 
                                            "16_16" = "pput", 
                                            "181_181" = "lpar", 
                                            "196_196" = "llcs",
                                            "1ab7_1ab7" = "wp18", 
                                            "28_28" = "atrn",
                                            "29_29" = "gfra", 
                                            "3_3" = "lfrc",
                                            "30_30" = "apan", 
                                            "31_31" = "kmed",
                                            "34_34" = "apnb",
                                            "35_35" = "aace",
                                            "39_39" = "apa3",
                                            "4_4" = "apoc",
                                            "43_43" = "aci5",
                                            "45_45" = "aori",
                                            "5_5" = "atrc",
                                            "52_52" = "cint",
                                            "56_56" = "galb",
                                            "6_6" = "amac",
                                            "66_66" = "lmli",
                                            "69_69" = "lfal",
                                            "70_70" = "lfrk",
                                            "73_73" = "lbuc",
                                            "75_75" = "lplw",
                                            "98_98" = "lsui")
                             )
                             
pd3e$abc2.x <- factor(pd3e$abc2.x, levels = rev(c("atrn","atrc","amac","aori","aci5","aace","apan","apa3","asl5","apnb","apoc","gfra","galb","kmed","cint","ecok","pput","lmli","lplw","lpar","lbrc","lbuc","lfrk","lfrc","llcs","wp07","wp13","wp18","lfal","lsui","lc37","bsub")))

pd3e <- pd3e %>% arrange(abc2.x)

letters_orderA$name2 <- plyr::revalue(letters_orderA$name, c("1_1" = "lbrc",
                                                             "11cb6_11cb6" = "wp07", 
                                                             "11cb7_11cb7" = "wp13",
                                                             "11ct2_11ct2"="asl5", 
                                                             "13_13"="bsub", 
                                                             "137_137" = "lc37", 
                                                             "15_15" = "ecok", 
                                                             "16_16" = "pput", 
                                                             "181_181" = "lpar", 
                                                             "196_196" = "llcs",
                                                             "1ab7_1ab7" = "wp18", 
                                                             "28_28" = "atrn",
                                                             "29_29" = "gfra", 
                                                             "3_3" = "lfrc",
                                                             "30_30" = "apan", 
                                                             "31_31" = "kmed",
                                                             "34_34" = "apnb",
                                                             "35_35" = "aace",
                                                             "39_39" = "apa3",
                                                             "4_4" = "apoc",
                                                             "43_43" = "aci5",
                                                             "45_45" = "aori",
                                                             "5_5" = "atrc",
                                                             "52_52" = "cint",
                                                             "56_56" = "galb",
                                                             "6_6" = "amac",
                                                             "66_66" = "lmli",
                                                             "69_69" = "lfal",
                                                             "70_70" = "lfrk",
                                                             "73_73" = "lbuc",
                                                             "75_75" = "lplw",
                                                             "98_98" = "lsui"
                                                             )
                                      )
letters_orderA$name2 <- factor(letters_orderA$name2, levels = rev(c("atrn","atrc","amac","aori","aci5","aace","apan","apa3","asl5","apnb","apoc","gfra","galb","kmed","cint","ecok","pput","lmli","lplw","lpar","lbrc","lbuc","lfrk","lfrc","llcs","wp07","wp13","wp18","lfal","lsui","lc37","bsub")))

letters_order <- letters_orderA %>% arrange(name2) %>% dplyr::select(abc2.mcletters.Letters) %>% unlist() %>% unname()

colors2 <- rev(c(rep("red",11),rep("orange",4),rep("green",2),rep("blue", 7),rep("purple",7),rep("purple4",1)))

    ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(droplevels(pd3e$abc2.x)))))), yend=mean)) + 
        geom_segment(stat="identity",size=12, col=colors2) + 
        scale_fill_manual(name="Legend",values=colors2) +
        scale_y_continuous(limits = c(0,1))+
        theme_cowplot() + 
        theme(axis.text.x = element_text(angle = 45, hjust=1), axis.text.y = element_text(hjust=0)) + # text = element_text(size=32), axis.text = element_text(size=32), 
        geom_errorbar(aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=2) +
        labs(x="",y = "Preference index") + 
      geom_text(x= 0, y= 0.02, label = "5%Y 10%G",color="green1",  hjust = -.1, angle = 90) +# size = 12
      geom_text(x= 0, y= 1, label = "10%Y 10%G",color="blue", hjust =-.1, angle = 90) + #size = 12
        geom_hline(yintercept = 0.5, linetype="dashed") +
        geom_text(aes(label=unlist(unname(letters_order)), y=.75)) + # , size=9
      coord_flip()

    #ggsave(h=16,w=7,"MGWA_data.png")

Figure S3A total feeding

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 347 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1724cb6f92f.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat-ByDay.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat-ByDay.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 372 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172a1a402.xls
abb <- read_xls(tf, sheet = "Per Day")

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat-ByDay-ByTime.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat-ByDay-ByTime.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 389 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea17274167fa0.xls
aab <- read_xls(tf, sheet = "By Time")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
i=1
for(i in 1:41) {
    lacto_pref <- (ccc[,i]/(ccc[,(41+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(41+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% 
      mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+41])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% 
      dplyr::select(trt,pref,day,time,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pref_data <- out_df %>% 
    mutate(trt=gsub(pattern = " 100g100y", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 100g50y", replacement = "",x = trt,perl = T)) %>%
    filter(!trt%in%c("disregard-111_disregard-111","disregard-68_disregard-68")) %>%
    mutate(trt=factor(trt)) %>%
    filter(!trt%in%c("axenic_axenic","67_67","25_25","48_48","32_32")) %>% 
  mutate(total = dieta + dietb) %>%
    droplevels()

#write.csv(pref_data, "pref_data.csv")

pd2 <- read.csv(url("https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/lookup_code.csv")) %>% inner_join(pref_data, by=c("trt")) %>% filter(!is.na(time)) %>% droplevels()

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$time2 <- as.factor(as.character(pd2$time))
pd2$code <- as.factor(as.character(pd2$code))
pd2$td <- paste0(pd2$time,pd2$day)
pd2$trt <- as.factor(as.character(pd2$trt))

cat("KW test results")
## KW test results
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 41.755, df = 31, p-value = 0.09402
cat("N per group")
## N per group
table(pd2$trt)
## 
##         1_1 11cb6_11cb6 11cb7_11cb7 11ct2_11ct2       13_13     137_137 
##          12          12          13          16           8           9 
##       15_15       16_16     181_181     196_196   1ab7_1ab7       28_28 
##          16          13          14          10          13          11 
##       29_29         3_3       30_30       31_31       34_34       35_35 
##          11          12          14          17          13          13 
##       39_39         4_4       43_43       45_45         5_5       52_52 
##          15          12          21          17          17          22 
##       56_56         6_6       66_66       69_69       70_70       73_73 
##          13          13          16          16          14          17 
##       75_75       98_98 
##           9          18
pd3e <- pd2 %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

pd3e$abc2.x <- factor(pd3e$trt, levels = c("1_1","11cb6_11cb6","11cb7_11cb7","11ct2_11ct2","13_13","137_137","15_15","16_16","181_181","196_196","1ab7_1ab7","28_28","29_29","3_3","30_30","31_31","34_34","35_35","39_39","4_4","43_43","45_45","5_5","52_52","56_56","6_6","66_66","69_69","70_70","73_73","75_75","98_98"))

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("1_1" = "lbrc",
                                            "11cb6_11cb6" = "wp07",
                                            "11cb7_11cb7" = "wp13",
                                            "11ct2_11ct2"="asl5",
                                            "13_13"="bsub", 
                                            "137_137" = "lc37", 
                                            "15_15" = "ecok", 
                                            "16_16" = "pput", 
                                            "181_181" = "lpar", 
                                            "196_196" = "llcs",
                                            "1ab7_1ab7" = "wp18", 
                                            "28_28" = "atrn",
                                            "29_29" = "gfra", 
                                            "3_3" = "lfrc",
                                            "30_30" = "apan", 
                                            "31_31" = "kmed",
                                            "34_34" = "apnb",
                                            "35_35" = "aace",
                                            "39_39" = "apa3",
                                            "4_4" = "apoc",
                                            "43_43" = "aci5",
                                            "45_45" = "aori",
                                            "5_5" = "atrc",
                                            "52_52" = "cint",
                                            "56_56" = "galb",
                                            "6_6" = "amac",
                                            "66_66" = "lmli",
                                            "69_69" = "lfal",
                                            "70_70" = "lfrk",
                                            "73_73" = "lbuc",
                                            "75_75" = "lplw",
                                            "98_98" = "lsui")
                             )
                             
pd3e$abc2.x <- factor(pd3e$abc2.x, levels = rev(c("atrn","atrc","amac","aori","aci5","aace","apan","apa3","asl5","apnb","apoc","gfra","galb","kmed","cint","ecok","pput","lmli","lplw","lpar","lbrc","lbuc","lfrk","lfrc","llcs","wp07","wp13","wp18","lfal","lsui","lc37","bsub")))

pd3e <- pd3e %>% arrange(abc2.x)

colors2 <- rev(c(rep("red",11),rep("orange",4),rep("green",2),rep("blue", 7),rep("purple",7),rep("purple4",1)))


    ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(droplevels(pd3e$abc2.x)))))), yend=mean)) + 
        geom_segment(stat="identity",size=12, col=colors2) + 
        scale_fill_manual(name="Legend",values=colors2) +
        scale_y_continuous(limits = c(0,400))+
        theme_cowplot() + 
        theme(axis.text.x = element_text(angle = 45, hjust=1), axis.text.y = element_text(hjust=0), text = element_text(size=32), axis.text = element_text(size=32)) +
        geom_errorbar(aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=2) +
        labs(x="bacterial strain (code)",y = "# of sips") + 
#     geom_text(x= 0, y= 0.02, label = "5%Y 10%G",color="green1",  hjust = -.1, angle = 90) +# size = 12
#     geom_text(x= 0, y= 1, label = "10%Y 10%G",color="blue", hjust =-.1, angle = 90) + #size = 12
#       geom_hline(yintercept = 0.5, linetype="dashed") 
#       geom_text(aes(label=unlist(unname(letters_order)), y=.75)) + # , size=9
      coord_flip()

# ggsave(h=16,w=7,"MGWA_totalfeed.png")

Figure S3B total sip duration

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 347 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172211d80a6.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
aab <- read_xls(tf, sheet = "SipDurations")

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat-ByDay.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat-ByDay.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 372 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea17217e92346.xls
abb <- read_xls(tf, sheet = "Per Day")

ccc <- aaa

j=41

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())

 for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aab[,(i+j)],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,c(1)]) & lp2[,c(1)]!=-1 & !is.na(lp2[,c(3)]) & lp2[,c(3)]!=-1 & !is.na(lp2[,c(4)]) & lp2[,c(4)]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time1=lp3[,3], time2 =lp3[,4],dieta=lp3[,5], dietb = lp3[,6], ) %>% dplyr::select(trt,pref,day,time = time1, sip1=dieta, sip2 = dietb, dieta=time1, dietb=time2) 
    out_df <- rbind(out_df,lp4)
  }
  
  pref_data <- out_df %>% 
    filter(sip1<1000 & sip2<1000) %>%
    filter(!trt%in%c("disregard-111 100g100y_disregard-111 100g50y","disregard-68 100g100y_disregard-68 100g50y","67 100g100y_67 100g50y","25 100g100y_25 100g50y","48 100g100y_48 100g50y","32 100g100y_32 100g50y","axenic 100g100y_axenic 100g50y","103 100g100y_103 100g50y","71 100g100y_71 100g50y")) %>%
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
  # filter(dieta<200 & dietb<200) %>%
    mutate(total = dieta+dietb) %>%
    droplevels() %>%
    reshape2::melt(measure.vars = c("dieta", "dietb"),variable.name = "time_consuming_diet")

    pref_data$trt2 <- plyr::revalue(pref_data$trt, c("1 100g100y_1 100g50y" = "lbrc",
                                            "11cb6 100g100y_11cb6 100g50y" = "wp07",
                                            "11cb7 100g100y_11cb7 100g50y" = "wp13",
                                            "11ct2 100g100y_11ct2 100g50y"="asl5",
                                            "13 100g100y_13 100g50y"="bsub", 
                                            "137 100g100y_137 100g50y" = "lc37", 
                                            "15 100g100y_15 100g50y" = "ecok", 
                                            "16 100g100y_16 100g50y" = "pput", 
                                            "181 100g100y_181 100g50y" = "lpar", 
                                            "196 100g100y_196 100g50y" = "llcs",
                                            "1ab7 100g100y_1ab7 100g50y" = "wp18", 
                                            "28 100g100y_28 100g50y" = "atrn",
                                            "29 100g100y_29 100g50y" = "gfra", 
                                            "3 100g100y_3 100g50y" = "lfrc",
                                            "30 100g100y_30 100g50y" = "apan", 
                                            "31 100g100y_31 100g50y" = "kmed",
                                            "34 100g100y_34 100g50y" = "apnb",
                                            "35 100g100y_35 100g50y" = "aace",
                                            "39 100g100y_39 100g50y" = "apa3",
                                            "4 100g100y_4 100g50y" = "apoc",
                                            "43 100g100y_43 100g50y" = "aci5",
                                            "45 100g100y_45 100g50y" = "aori",
                                            "5 100g100y_5 100g50y" = "atrc",
                                            "52 100g100y_52 100g50y" = "cint",
                                            "56 100g100y_56 100g50y" = "galb",
                                            "6 100g100y_6 100g50y" = "amac",
                                            "66 100g100y_66 100g50y" = "lmli",
                                            "69 100g100y_69 100g50y" = "lfal",
                                            "70 100g100y_70 100g50y" = "lfrk",
                                            "73 100g100y_73 100g50y" = "lbuc",
                                            "75 100g100y_75 100g50y" = "lplw",
                                            "98 100g100y_98 100g50y" = "lsui")
                             )
                             
pref_data$trt2 <- factor(pref_data$trt2, levels = rev(c("atrn","atrc","amac","aori","aci5","aace","apan","apa3","asl5","apnb","apoc","gfra","galb","kmed","cint","ecok","pput","lmli","lplw","lpar","lbrc","lbuc","lfrk","lfrc","llcs","wp07","wp13","wp18","lfal","lsui","lc37","bsub")))

  pref_data$tt <- with(pref_data, interaction(trt2, time_consuming_diet))
  
  cat("KW test results")
## KW test results
kwtest <- kruskal.test(pref_data$value ~ pref_data$tt)
  
  if (kwtest$p.value < 0.05) {
    efg <- dunn.test(pref_data$value, pref_data$tt, method = "bh", table = F, kw=F)
    ghi <- cldList(comparison = efg$comparisons, p.value = efg$P.adjusted, threshold = 0.05)
    diff_codes <- as.character(unlist(ghi$Letter))
  } else {
    diff_codes <- c(rep("a",j*2))
  }
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
cat("Compact letter displays")
## Compact letter displays
ghi
##         Group  Letter MonoLetter
## 1  aace.dieta  abcdef    abcdef 
## 2  aace.dietb    abcd    abcd   
## 3  aci5.dieta    aefg    a   efg
## 4  aci5.dietb abcdefg    abcdefg
## 5  amac.dieta abcdefg    abcdefg
## 6  amac.dietb    abcd    abcd   
## 7  aori.dieta abcdefg    abcdefg
## 8  aori.dietb abcdefg    abcdefg
## 9  apa3.dieta abcdefg    abcdefg
## 10 apa3.dietb    abcd    abcd   
## 11 apan.dieta abcdefg    abcdefg
## 12 apan.dietb abcdefg    abcdefg
## 13 apnb.dieta abcdefg    abcdefg
## 14 apnb.dietb    abcd    abcd   
## 15 apoc.dieta abcdefg    abcdefg
## 16 apoc.dietb abcdefg    abcdefg
## 17 asl5.dieta abcdefg    abcdefg
## 18 asl5.dietb abcdefg    abcdefg
## 19 atrc.dieta  abcefg    abc efg
## 20 atrc.dietb    abcd    abcd   
## 21 atrn.dieta abcdefg    abcdefg
## 22 atrn.dietb abcdefg    abcdefg
## 23 bsub.dieta  abcefg    abc efg
## 24 bsub.dietb      bd     b d   
## 25 cint.dieta  abcefg    abc efg
## 26 cint.dietb  abcdef    abcdef 
## 27 ecok.dieta   acefg    a c efg
## 28 ecok.dietb abcdefg    abcdefg
## 29 galb.dieta abcdefg    abcdefg
## 30 galb.dietb     bcd     bcd   
## 31 gfra.dieta abcdefg    abcdefg
## 32 gfra.dietb       d       d   
## 33 kmed.dieta  abcefg    abc efg
## 34 kmed.dietb     bcd     bcd   
## 35 lbrc.dieta      eg        e g
## 36 lbrc.dietb abcdefg    abcdefg
## 37 lbuc.dieta  abcefg    abc efg
## 38 lbuc.dietb  abcefg    abc efg
## 39 lc37.dieta abcdefg    abcdefg
## 40 lc37.dietb  abcefg    abc efg
## 41 lfal.dieta     efg        efg
## 42 lfal.dietb abcdefg    abcdefg
## 43 lfrc.dieta  abcefg    abc efg
## 44 lfrc.dietb    abcd    abcd   
## 45 lfrk.dieta   acefg    a c efg
## 46 lfrk.dietb   acefg    a c efg
## 47 llcs.dieta abcdefg    abcdefg
## 48 llcs.dietb abcdefg    abcdefg
## 49 lmli.dieta abcdefg    abcdefg
## 50 lmli.dietb    abcd    abcd   
## 51 lpar.dieta abcdefg    abcdefg
## 52 lpar.dietb abcdefg    abcdefg
## 53 lplw.dieta    aefg    a   efg
## 54 lplw.dietb abcdefg    abcdefg
## 55 lsui.dieta abcdefg    abcdefg
## 56 lsui.dietb abcdefg    abcdefg
## 57 pput.dieta abcdefg    abcdefg
## 58 pput.dietb abcdefg    abcdefg
## 59  wp7.dieta       g          g
## 60  wp7.dietb   abcdf    abcd f 
## 61 wp13.dieta abcdefg    abcdefg
## 62 wp13.dietb    abcd    abcd   
## 63 wp18.dieta abcdefg    abcdefg
## 64 wp18.dietb abcdefg    abcdefg
  pd3e <- pref_data %>%
    group_by(tt) %>%
    summarize(mean = mean(value), sem= sd(value)/sqrt(dplyr::n())) %>%
    ungroup()

  cat("N per group")  
## N per group
  table(pref_data$tt)
## 
## bsub.dieta lc37.dieta lsui.dieta lfal.dieta wp18.dieta wp13.dieta wp07.dieta 
##          8          7         14         14         12         12         11 
## llcs.dieta lfrc.dieta lfrk.dieta lbuc.dieta lbrc.dieta lpar.dieta lplw.dieta 
##         10         12         13         14         12         13          9 
## lmli.dieta pput.dieta ecok.dieta cint.dieta kmed.dieta galb.dieta gfra.dieta 
##         15         13         15         21         15         11          9 
## apoc.dieta apnb.dieta asl5.dieta apa3.dieta apan.dieta aace.dieta aci5.dieta 
##         11         12         15         14         13         13         18 
## aori.dieta amac.dieta atrc.dieta atrn.dieta bsub.dietb lc37.dietb lsui.dietb 
##         16          9         15         11          8          7         14 
## lfal.dietb wp18.dietb wp13.dietb wp07.dietb llcs.dietb lfrc.dietb lfrk.dietb 
##         14         12         12         11         10         12         13 
## lbuc.dietb lbrc.dietb lpar.dietb lplw.dietb lmli.dietb pput.dietb ecok.dietb 
##         14         12         13          9         15         13         15 
## cint.dietb kmed.dietb galb.dietb gfra.dietb apoc.dietb apnb.dietb asl5.dietb 
##         21         15         11          9         11         12         15 
## apa3.dietb apan.dietb aace.dietb aci5.dietb aori.dietb amac.dietb atrc.dietb 
##         14         13         13         18         16          9         15 
## atrn.dietb 
##         11
  pd3e$tt <- gsub(pattern = "dieta", replacement = "C", x = pd3e$tt)
  pd3e$tt <- gsub(pattern = "dietb", replacement = "T", x = pd3e$tt)
  
  pd3e$tt <- as.character(pd3e$tt)
  pd3e$tt <- factor(pd3e$tt)  
 
   pd3f <- pd3e %>% 
    inner_join(ghi %>% mutate(tt = gsub(pattern = "dieta", replacement = "C", x = Group)) %>% mutate(tt = gsub(pattern = "dietb", replacement = "T", x = tt)) %>% mutate(tt = gsub(pattern = "wp7", replacement = "wp07", x = tt))) 

  pd3f$tt <- factor(pd3f$tt, levels = (c("bsub.C","bsub.T","lc37.C","lc37.T","lsui.C","lsui.T","lfal.C","lfal.T","wp18.C","wp18.T","wp13.C","wp13.T","wp07.C","wp07.T","llcs.C","llcs.T","lfrc.C","lfrc.T","lfrk.C","lfrk.T","lbuc.C","lbuc.T","lbrc.C","lbrc.T","lpar.C","lpar.T","lplw.C","lplw.T","lmli.C","lmli.T","pput.C","pput.T","ecok.C","ecok.T","cint.C","cint.T","kmed.C","kmed.T","galb.C","galb.T","gfra.C","gfra.T","apoc.C","apoc.T","apnb.C","apnb.T","asl5.C","asl5.T","apa3.C","apa3.T","apan.C","apan.T","aace.C","aace.T","aci5.C","aci5.T","aori.C","aori.T","amac.C","amac.T","atrc.C","atrc.T","atrn.C","atrn.T")))
  
  plot_text_size = 3
  
    colors2 <- rev(c(rep(c("red","red", "pink","pink"),11/2),rep(c("orange","orange", "#FFCC66","#FFCC66"),2),rep(c("dark green","dark green", "light green","light green"),1),rep(c("blue","blue", "light blue","light blue"), 4),rep(c("purple","purple", "orchid1","orchid1"),8/2)))

  all_sipduration_plot <- ggplot(pd3f, aes(x=tt, y = mean,fill = tt)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
    scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,.3))+
    theme_cowplot() +
#   theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_text(angle = 45), axis.ticks = element_blank()) +
#   theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.4), axis.ticks = element_blank()) +
        theme(axis.text.x = element_text(angle = 45, hjust=1), axis.text.y = element_text(hjust=0), text = element_text(size=32), axis.text = element_text(size=32),legend.position = "none") +

    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=.75, size=2) +
  # geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=Letter, y=mean + sem + .04), size=plot_text_size*2) + 
    coord_flip() + 
    labs(y = "Sip duration (s)")
  all_sipduration_plot

    #ggsave(h=16,w=7,"MGWA_data-sipduration_addfilter.png")

Figure S3c total sip distribution

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 347 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172716d1864.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat-ByDay.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat-ByDay.xls]
##   Date: 2022-05-30 23:20
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 372 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172315c8769.xls
abb <- read_xls(tf, sheet = "Per Day")

ccc <- aaa

j=41

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),sip1 = numeric(), sip2 = numeric(), dieta=character(),dietb=character())

  for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,c(1)]) & lp2[,c(1)]!=-1 & !is.na(lp2[,c(3)]) & lp2[,c(3)]!=-1 & !is.na(lp2[,c(4)]) & lp2[,c(4)]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time1=lp3[,3], time2 =lp3[,4]) %>%
      dplyr::select(trt,day,sipsA = time1, sipsB=time2) %>%
      mutate(total = sipsA+sipsB)
    out_df <- rbind(out_df,lp4)
  }
  
  pref_data <- out_df %>% 
    filter(sipsA<1000 & sipsB<1000) %>%
    filter(!trt%in%c("disregard-111 100g100y_disregard-111 100g50y","disregard-68 100g100y_disregard-68 100g50y","67 100g100y_67 100g50y","25 100g100y_25 100g50y","48 100g100y_48 100g50y","32 100g100y_32 100g50y","axenic 100g100y_axenic 100g50y","103 100g100y_103 100g50y","71 100g100y_71 100g50y")) %>%
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    droplevels() %>%
    reshape2::melt(measure.vars = c("sipsA", "sipsB"),variable.name = "time_consuming_diet")

    pref_data$trt2 <- plyr::revalue(pref_data$trt, c("1 100g100y_1 100g50y" = "lbrc",
                                            "11cb6 100g100y_11cb6 100g50y" = "wp07",
                                            "11cb7 100g100y_11cb7 100g50y" = "wp13",
                                            "11ct2 100g100y_11ct2 100g50y"="asl5",
                                            "13 100g100y_13 100g50y"="bsub", 
                                            "137 100g100y_137 100g50y" = "lc37", 
                                            "15 100g100y_15 100g50y" = "ecok", 
                                            "16 100g100y_16 100g50y" = "pput", 
                                            "181 100g100y_181 100g50y" = "lpar", 
                                            "196 100g100y_196 100g50y" = "llcs",
                                            "1ab7 100g100y_1ab7 100g50y" = "wp18", 
                                            "28 100g100y_28 100g50y" = "atrn",
                                            "29 100g100y_29 100g50y" = "gfra", 
                                            "3 100g100y_3 100g50y" = "lfrc",
                                            "30 100g100y_30 100g50y" = "apan", 
                                            "31 100g100y_31 100g50y" = "kmed",
                                            "34 100g100y_34 100g50y" = "apnb",
                                            "35 100g100y_35 100g50y" = "aace",
                                            "39 100g100y_39 100g50y" = "apa3",
                                            "4 100g100y_4 100g50y" = "apoc",
                                            "43 100g100y_43 100g50y" = "aci5",
                                            "45 100g100y_45 100g50y" = "aori",
                                            "5 100g100y_5 100g50y" = "atrc",
                                            "52 100g100y_52 100g50y" = "cint",
                                            "56 100g100y_56 100g50y" = "galb",
                                            "6 100g100y_6 100g50y" = "amac",
                                            "66 100g100y_66 100g50y" = "lmli",
                                            "69 100g100y_69 100g50y" = "lfal",
                                            "70 100g100y_70 100g50y" = "lfrk",
                                            "73 100g100y_73 100g50y" = "lbuc",
                                            "75 100g100y_75 100g50y" = "lplw",
                                            "98 100g100y_98 100g50y" = "lsui")
                             )
                             
pref_data$trt2 <- factor(pref_data$trt2, levels = rev(c("atrn","atrc","amac","aori","aci5","aace","apan","apa3","asl5","apnb","apoc","gfra","galb","kmed","cint","ecok","pput","lmli","lplw","lpar","lbrc","lbuc","lfrk","lfrc","llcs","wp07","wp13","wp18","lfal","lsui","lc37","bsub")))

  pref_data$tt <- with(pref_data, interaction(trt2, time_consuming_diet))
  
  cat("KW test results")
## KW test results
kwtest <- kruskal.test(pref_data$value ~ pref_data$tt)
print(kwtest)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 89.152, df = 63, p-value = 0.01675
table(pref_data$trt)
## 
##         1 100g100y_1 100g50y 11cb6 100g100y_11cb6 100g50y 
##                           24                           24 
## 11cb7 100g100y_11cb7 100g50y 11ct2 100g100y_11ct2 100g50y 
##                           26                           32 
##       13 100g100y_13 100g50y     137 100g100y_137 100g50y 
##                           16                           18 
##       15 100g100y_15 100g50y       16 100g100y_16 100g50y 
##                           32                           26 
##     181 100g100y_181 100g50y     196 100g100y_196 100g50y 
##                           28                           20 
##   1ab7 100g100y_1ab7 100g50y       28 100g100y_28 100g50y 
##                           26                           22 
##       29 100g100y_29 100g50y         3 100g100y_3 100g50y 
##                           22                           24 
##       30 100g100y_30 100g50y       31 100g100y_31 100g50y 
##                           28                           34 
##       34 100g100y_34 100g50y       35 100g100y_35 100g50y 
##                           26                           26 
##       39 100g100y_39 100g50y         4 100g100y_4 100g50y 
##                           30                           24 
##       43 100g100y_43 100g50y       45 100g100y_45 100g50y 
##                           42                           34 
##         5 100g100y_5 100g50y       52 100g100y_52 100g50y 
##                           34                           44 
##       56 100g100y_56 100g50y         6 100g100y_6 100g50y 
##                           26                           26 
##       66 100g100y_66 100g50y       69 100g100y_69 100g50y 
##                           32                           32 
##       70 100g100y_70 100g50y       73 100g100y_73 100g50y 
##                           28                           34 
##       75 100g100y_75 100g50y       98 100g100y_98 100g50y 
##                           18                           36
  if (kwtest$p.value < 0.05) {
    efg <- dunn.test(pref_data$value, pref_data$tt, method = "bh", table = F, kw=F)
    ghi <- cldList(comparison = efg$comparisons, p.value = efg$P.adjusted, threshold = 0.05)
    diff_codes <- as.character(unlist(ghi$Letter))
    write.csv(data.frame(efg) %>% filter(grepl(pattern = "WT", x = comparisons)), "figs3c_significance.csv")

  } else {
    diff_codes <- c(rep("a",j*2))
  }
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
cat("Compact letter displays")
## Compact letter displays
ghi
##         Group Letter MonoLetter
## 1  aace.sipsA      a          a
## 2  aace.sipsB      a          a
## 3  aci5.sipsA      a          a
## 4  aci5.sipsB      a          a
## 5  amac.sipsA      a          a
## 6  amac.sipsB      a          a
## 7  aori.sipsA      a          a
## 8  aori.sipsB      a          a
## 9  apa3.sipsA      a          a
## 10 apa3.sipsB      a          a
## 11 apan.sipsA      a          a
## 12 apan.sipsB      a          a
## 13 apnb.sipsA      a          a
## 14 apnb.sipsB      a          a
## 15 apoc.sipsA      a          a
## 16 apoc.sipsB      a          a
## 17 asl5.sipsA      a          a
## 18 asl5.sipsB      a          a
## 19 atrc.sipsA      a          a
## 20 atrc.sipsB      a          a
## 21 atrn.sipsA      a          a
## 22 atrn.sipsB      a          a
## 23 bsub.sipsA      a          a
## 24 bsub.sipsB      a          a
## 25 cint.sipsA      a          a
## 26 cint.sipsB      a          a
## 27 ecok.sipsA      a          a
## 28 ecok.sipsB      a          a
## 29 galb.sipsA      a          a
## 30 galb.sipsB      a          a
## 31 gfra.sipsA      a          a
## 32 gfra.sipsB      a          a
## 33 kmed.sipsA      a          a
## 34 kmed.sipsB      a          a
## 35 lbrc.sipsA      a          a
## 36 lbrc.sipsB      a          a
## 37 lbuc.sipsA      a          a
## 38 lbuc.sipsB      a          a
## 39 lc37.sipsA      a          a
## 40 lc37.sipsB      a          a
## 41 lfal.sipsA      a          a
## 42 lfal.sipsB      a          a
## 43 lfrc.sipsA      a          a
## 44 lfrc.sipsB      a          a
## 45 lfrk.sipsA      a          a
## 46 lfrk.sipsB      a          a
## 47 llcs.sipsA      a          a
## 48 llcs.sipsB      a          a
## 49 lmli.sipsA      a          a
## 50 lmli.sipsB      a          a
## 51 lpar.sipsA      a          a
## 52 lpar.sipsB      a          a
## 53 lplw.sipsA      a          a
## 54 lplw.sipsB      a          a
## 55 lsui.sipsA      a          a
## 56 lsui.sipsB      a          a
## 57 pput.sipsA      a          a
## 58 pput.sipsB      a          a
## 59  wp7.sipsA      a          a
## 60  wp7.sipsB      a          a
## 61 wp13.sipsA      a          a
## 62 wp13.sipsB      a          a
## 63 wp18.sipsA      a          a
## 64 wp18.sipsB      a          a
  pd3e <- pref_data %>%
    group_by(tt) %>%
    summarize(mean = mean(value), sem= sd(value)/sqrt(dplyr::n())) %>%
    ungroup()

  cat("N per group")  
## N per group
  table(pref_data$tt)
## 
## bsub.sipsA lc37.sipsA lsui.sipsA lfal.sipsA wp18.sipsA wp13.sipsA wp07.sipsA 
##          8          9         18         16         13         13         12 
## llcs.sipsA lfrc.sipsA lfrk.sipsA lbuc.sipsA lbrc.sipsA lpar.sipsA lplw.sipsA 
##         10         12         14         17         12         14          9 
## lmli.sipsA pput.sipsA ecok.sipsA cint.sipsA kmed.sipsA galb.sipsA gfra.sipsA 
##         16         13         16         22         17         13         11 
## apoc.sipsA apnb.sipsA asl5.sipsA apa3.sipsA apan.sipsA aace.sipsA aci5.sipsA 
##         12         13         16         15         14         13         21 
## aori.sipsA amac.sipsA atrc.sipsA atrn.sipsA bsub.sipsB lc37.sipsB lsui.sipsB 
##         17         13         17         11          8          9         18 
## lfal.sipsB wp18.sipsB wp13.sipsB wp07.sipsB llcs.sipsB lfrc.sipsB lfrk.sipsB 
##         16         13         13         12         10         12         14 
## lbuc.sipsB lbrc.sipsB lpar.sipsB lplw.sipsB lmli.sipsB pput.sipsB ecok.sipsB 
##         17         12         14          9         16         13         16 
## cint.sipsB kmed.sipsB galb.sipsB gfra.sipsB apoc.sipsB apnb.sipsB asl5.sipsB 
##         22         17         13         11         12         13         16 
## apa3.sipsB apan.sipsB aace.sipsB aci5.sipsB aori.sipsB amac.sipsB atrc.sipsB 
##         15         14         13         21         17         13         17 
## atrn.sipsB 
##         11
  pd3e$tt <- gsub(pattern = "sipsA", replacement = "C", x = pd3e$tt)
  pd3e$tt <- gsub(pattern = "sipsB", replacement = "T", x = pd3e$tt)
  
  pd3e$tt <- as.character(pd3e$tt)
  pd3e$tt <- factor(pd3e$tt)  
 
   pd3f <- pd3e %>% 
    inner_join(ghi %>% mutate(tt = gsub(pattern = "sipsA", replacement = "C", x = Group)) %>% mutate(tt = gsub(pattern = "sipsB", replacement = "T", x = tt)) %>% mutate(tt = gsub(pattern = "wp7", replacement = "wp07", x = tt))) 

  pd3f$tt <- factor(pd3f$tt, levels = (c("bsub.C","bsub.T","lc37.C","lc37.T","lsui.C","lsui.T","lfal.C","lfal.T","wp18.C","wp18.T","wp13.C","wp13.T","wp07.C","wp07.T","llcs.C","llcs.T","lfrc.C","lfrc.T","lfrk.C","lfrk.T","lbuc.C","lbuc.T","lbrc.C","lbrc.T","lpar.C","lpar.T","lplw.C","lplw.T","lmli.C","lmli.T","pput.C","pput.T","ecok.C","ecok.T","cint.C","cint.T","kmed.C","kmed.T","galb.C","galb.T","gfra.C","gfra.T","apoc.C","apoc.T","apnb.C","apnb.T","asl5.C","asl5.T","apa3.C","apa3.T","apan.C","apan.T","aace.C","aace.T","aci5.C","aci5.T","aori.C","aori.T","amac.C","amac.T","atrc.C","atrc.T","atrn.C","atrn.T")))
  
  plot_text_size = 3
  
    colors2 <- rev(c(rep(c("red","red", "pink","pink"),11/2),rep(c("orange","orange", "#FFCC66","#FFCC66"),2),rep(c("dark green","dark green", "light green","light green"),1),rep(c("blue","blue", "light blue","light blue"), 4),rep(c("purple","purple", "orchid1","orchid1"),8/2)))

  all_sipdistribution_plot <- ggplot(pd3f, aes(x=tt, y = mean,fill = tt)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
    scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,350))+
    theme_cowplot() +
#   theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_text(angle = 45), axis.ticks = element_blank()) +
#   theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.4), axis.ticks = element_blank()) +
        theme(axis.text.x = element_text(angle = 45, hjust=1), axis.text.y = element_text(hjust=0), text = element_text(size=32), axis.text = element_text(size=32),legend.position = "none") +

    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=.75, size=2) +
  # geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=Letter, y=mean + sem + 30), size=plot_text_size*2) + 
    coord_flip() + 
    labs(y = "Sip duration (s)")

#ggsave(h=16,w=7,"MGWA_data-sipdistribution_addfilter.png", bg = "white")

MGWA, including Fig S4 QQs

pd2 <- read.csv(url("https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/lookup_code_mgwa2.csv")) %>% inner_join(read.csv(url("https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/pref_data.csv")), by=c("trt")) %>% filter(!is.na(time)) %>% droplevels()

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$time2 <- as.factor(as.character(pd2$time))
pd2$code <- as.factor(as.character(pd2$code))
pd2$td <- paste0(pd2$time,pd2$day)

efg <- read.csv(url('https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/fig2-efg.csv'))

pd3 <- cbind(pd2, efg %>% dplyr::select(-X)) %>% mutate(testcol = paste0(trt, X2,code))

parsed_data <- FormatAfterOrtho(url('https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/groups2020.txt'), format="groups")
## reading in OrthoMCL data...   done
## creating list of OG proteins...  done
## creating presence absence matrix...  done
## 
## Exported list with Presence/Absence Matrix & list of OG Proteins
# Unpound the single pounds to run code. pounded out now because it can be time  consuming, avoids long stalls on install. 

# mcl_matrixC <- AnalyzeOrthoMCL(mcl_data = parsed_data, 
#                       pheno_data = pd3,
#                        model = 'lmeR2ind',
#                        species_name = 'mgwa2',
#                        resp = 'X1',
#                        rndm1='day',
#                        rndm2='time',
#                        princ_coord = 0.5)
# # Fig S4A
# QQPlotter(mcl_matrixC)
# # doesn't run to avoid specifying each individual folder path, but can download from github files folder and program the folder locally. Change the 'mgwa2/precompliantFasta3/' to the directory containing the compliant fasta files you download from github.
# # joined_matrixC <- JoinRepSeq(parsed_data, 'mgwa2/precompliantfasta3/', mcl_matrixC, fastaformat = 'old')
# # setwd('../..')
# # WriteMCL(joined_matrixC,"lmeR2ind_binomialdata_1020.csv")

# mcl_matrixB <- AnalyzeOrthoMCL(mcl_data = parsed_data, 
#                       pheno_data = pd3,
#                        model = 'wx',
#                        species_name = 'mgwa2',
#                        resp = 'X1',
#                        princ_coord = 0.5)
# 
# # Fig S4B
# QQPlotter(mcl_matrixB)
# # doesn't run to avoid specifying each individual folder path, but can download from github files folder and program the folder locally. Change the 'mgwa2/precompliantFasta3/' to the directory containing the compliant fasta files you download from github.
# #joined_matrixB <- JoinRepSeq(parsed_data, 'mgwa2/precompliantFasta3/', mcl_matrixB, fastaformat = 'old')
# #setwd('../..')
# # WriteMCL(joined_matrixB,"wilcox_binomial_10_20.csv")

Figure 3

Figure 3

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat-all%20together%20with%20dayandtime.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat-all%20together%20with%20dayandtime.xls]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 163 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea17253c48e81.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
bbb <- read_xls(tf, sheet = "SipDurations")
abb <- read_xls(tf, sheet = "NumberOfSipsByDay")
aab <- read_xls(tf, sheet = "NumberOfSipsByDay")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
num_t=16
for(i in 1:num_t) {
    lacto_pref <- (ccc[,i]/(ccc[,(num_t+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(num_t+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+num_t])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pd2 <- out_df %>% 
    mutate(trt=gsub(pattern = " control", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " halfyeast", replacement = "",x = trt,perl = T)) %>%
    filter(!trt%in%c("disregard-111_disregard-111","disregard-68_disregard-68")) %>%
    mutate(trt=factor(trt)) %>%
    filter(!trt%in%c("axenic_axenic","67_67","25_25","48_48","32_32")) %>%
    filter(dieta<1000 & dietb<1000) %>%
  filter(trt %in% c("T54_T54", "T03_T03", "T10_T10", "T14_T14", "T18_T18" ,"T19_T19", "T20_T20", "T23_T23", "T28_T28")) %>%
    droplevels()

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

pd2$trt <- factor(pd2$trt,levels=c( "T54_T54", "T03_T03", "T10_T10", "T14_T14", "T18_T18" ,"T19_T19", "T20_T20","T23_T23", "T28_T28"))

abc <- glmer(cbind(dieta,dietb) ~ trt + (1|day2), data=pd2, family = "binomial")
cat("Statistics summary")
## Statistics summary
summary(abc)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: cbind(dieta, dietb) ~ trt + (1 | day2)
##    Data: pd2
## 
##      AIC      BIC   logLik deviance df.resid 
##   4795.2   4825.2  -2387.6   4775.2      139 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -14.6249  -2.9944  -0.4187   2.9542  15.0592 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  day2   (Intercept) 0.1833   0.4281  
## Number of obs: 149, groups:  day2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.51321    0.15820  -3.244  0.00118 ** 
## trtT03_T03   0.06679    0.06656   1.003  0.31566    
## trtT10_T10   0.54860    0.06570   8.350  < 2e-16 ***
## trtT14_T14  -0.52003    0.08430  -6.169 6.88e-10 ***
## trtT18_T18  -0.26864    0.06147  -4.371 1.24e-05 ***
## trtT19_T19   0.33182    0.06186   5.364 8.15e-08 ***
## trtT20_T20   0.02431    0.07861   0.309  0.75714    
## trtT23_T23   0.09520    0.06264   1.520  0.12857    
## trtT28_T28  -0.15225    0.06252  -2.435  0.01488 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr) tT03_T tT10_T tT14_T tT18_T tT19_T tT20_T tT23_T
## trtT03_T03 -0.208                                                 
## trtT10_T10 -0.212  0.533                                          
## trtT14_T14 -0.159  0.393  0.400                                   
## trtT18_T18 -0.221  0.537  0.544  0.401                            
## trtT19_T19 -0.211  0.509  0.525  0.403  0.550                     
## trtT20_T20 -0.172  0.426  0.448  0.313  0.451  0.420              
## trtT23_T23 -0.217  0.554  0.538  0.414  0.545  0.544  0.428       
## trtT28_T28 -0.216  0.535  0.532  0.409  0.575  0.541  0.433  0.547
def <- summary(abc)
abc3 <- glht(abc, mcp(trt='Dunnett'))
cat("Dunnett test summary")
## Dunnett test summary
summary(abc3)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Dunnett Contrasts
## 
## 
## Fit: glmer(formula = cbind(dieta, dietb) ~ trt + (1 | day2), data = pd2, 
##     family = "binomial")
## 
## Linear Hypotheses:
##                        Estimate Std. Error z value Pr(>|z|)    
## T03_T03 - T54_T54 == 0  0.06679    0.06656   1.003   0.8884    
## T10_T10 - T54_T54 == 0  0.54860    0.06570   8.350   <0.001 ***
## T14_T14 - T54_T54 == 0 -0.52003    0.08430  -6.169   <0.001 ***
## T18_T18 - T54_T54 == 0 -0.26864    0.06147  -4.371   <0.001 ***
## T19_T19 - T54_T54 == 0  0.33182    0.06186   5.364   <0.001 ***
## T20_T20 - T54_T54 == 0  0.02431    0.07861   0.309   0.9999    
## T23_T23 - T54_T54 == 0  0.09520    0.06264   1.520   0.5316    
## T28_T28 - T54_T54 == 0 -0.15225    0.06252  -2.435   0.0886 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
## creating plot
abc1 <- glht(abc, mcp(trt='Tukey'))
abc2 <- cld(abc1)
efg <- data.frame(cbind(abc2$lp, abc2$x,abc2$xname)) %>% mutate(X1 = as.numeric(as.character(X1)))
str(efg)
## 'data.frame':    149 obs. of  3 variables:
##  $ X1: num  0.338 0.253 0.329 0.324 0.42 ...
##  $ X2: chr  "1" "1" "1" "1" ...
##  $ X3: chr  "trt" "trt" "trt" "trt" ...
fgh <- efg %>% group_by(X2) %>% dplyr::summarize(mean = mean(X1), sem = sd(X1)/sqrt(sum(X1>-1)))

pd3e <- data.frame(abc2$lp, abc2$x) %>% group_by(abc2.x) %>% dplyr::summarize(mean = mean(abc2.lp), sem=sd(abc2.lp)/sqrt(sum(abc2.lp<2)))
colors2 <- c("gray")

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("T54_T54" ,"T03_T03", "T10_T10", "T14_T14", "T18_T18" ,"T19_T19" ,"T20_T20", "T23_T23", "T28_T28"))
pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("T54_T54" = "WT",
                                            "T03_T03" = "NitT/TauT",# NS
                                            "T10_T10" = "thiE",#***
                                            "T14_T14" = "oprB",#***
                                            "T18_T18" = "permease",#NS
                                            "T19_T19" = "iscS",#***
                                            "T20_T20" = "nuoA",#NS
                                            "T23_T23" = "ALDH",#NS
                                            "T28_T28" = "Hypothetical")# 
                             )

sigvals <- c("","","***","***","***","***","","","")

ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(pd3e$abc2.x))))), yend=mean)) + 
    geom_segment(stat="identity", size = 8,col="red") + #size=24, 
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,1))+
    theme_cowplot() + 
    theme(axis.text.x = element_text(angle = 45, hjust=1)) +#text = element_text(size=32), axis.text = element_text(size=32), 
    geom_errorbar(ymax =pd3e$mean+pd3e$sem, ymin = pd3e$mean-pd3e$sem, width=0.25, size=.75) +
    labs(x="Bacterial mutant",y = "Preference index") + 
  annotate(geom="label", x= 0, y= 0.02, label = "5%Y 10%G",color="green1",  hjust = 0, fill="white", label.size=0) +#size=12, 
  annotate(geom="label", x= 0, y= 1, label = "10%Y 10%G",color="blue", hjust = 0, fill="white", label.size=0) + #size=12, 
    geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=sigvals, y=.6)) #, size=12

x=.6
#ggsave(h=5.17*x,w=6.64*x,"mutant_validation-2_27.png")

Figure S5A feeding totals

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat-all%20together%20with%20dayandtime.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat-all%20together%20with%20dayandtime.xls]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 163 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1721f2e2672.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
bbb <- read_xls(tf, sheet = "SipDurations")
abb <- read_xls(tf, sheet = "NumberOfSipsByDay")
aab <- read_xls(tf, sheet = "NumberOfSipsByDay")
ccc <- aaa

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())
num_t=16
for(i in 1:num_t) {
    lacto_pref <- (ccc[,i]/(ccc[,(num_t+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aaa[,i],aaa[,(num_t+i)])
    lp3 <- lp2[(!is.na(lp2[,1]) & lp2[,1]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+num_t])), pref=lp3[,1], day=lp3[,2], time=lp3[,3],dieta=lp3[,4],dietb=lp3[,5]) %>% dplyr::select(trt,pref,day,dieta,dietb)
    out_df <- rbind(out_df,lp4)
}

pd2 <- out_df %>% 
    mutate(trt=gsub(pattern = " control", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " halfyeast", replacement = "",x = trt,perl = T)) %>%
    filter(!trt%in%c("disregard-111_disregard-111","disregard-68_disregard-68")) %>%
    mutate(trt=factor(trt)) %>%
    filter(!trt%in%c("axenic_axenic","67_67","25_25","48_48","32_32")) %>%
    filter(dieta<1000 & dietb<1000) %>%
  filter(trt %in% c("T54_T54", "T03_T03", "T10_T10", "T14_T14", "T18_T18" ,"T19_T19", "T20_T20", "T23_T23", "T28_T28")) %>%
  mutate(total = dieta+dietb) %>%
    droplevels()

pd2$day2 <- as.factor(as.character(pd2$day))
pd2$trt <- as.factor(as.character(pd2$trt))

pd2$trt <- factor(pd2$trt,levels=c( "T54_T54", "T03_T03", "T10_T10", "T14_T14", "T18_T18" ,"T19_T19", "T20_T20","T23_T23", "T28_T28"))

cat("KW test")
## KW test
kruskal.test(pd2$total ~ pd2$trt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pd2$total by pd2$trt
## Kruskal-Wallis chi-squared = 11.461, df = 8, p-value = 0.1769
cat("N per group")
## N per group
table(pd2$trt)
## 
## T54_T54 T03_T03 T10_T10 T14_T14 T18_T18 T19_T19 T20_T20 T23_T23 T28_T28 
##      20      14      13      15      20      16      14      16      21
pd3e <- pd2 %>%
  group_by(trt) %>%
  summarize(mean = mean(total), sem= sd(total)/sqrt(dplyr::n())) %>%
  ungroup()

pd3e$abc2.x <- factor(pd3e$trt, levels = c("T54_T54" ,"T03_T03", "T10_T10", "T14_T14", "T18_T18" ,"T19_T19" ,"T20_T20", "T23_T23", "T28_T28"))

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("T54_T54" = "WT",
                                            "T03_T03" = "NitT/TauT",# NS
                                            "T10_T10" = "thiE",#***
                                            "T14_T14" = "oprB",#***
                                            "T18_T18" = "permease",#NS
                                            "T19_T19" = "iscS",#***
                                            "T20_T20" = "nuoA",#NS
                                            "T23_T23" = "ALDH",#NS
                                            "T28_T28" = "Hypothetical")# 
                             )

ggplot(pd3e, aes(x = abc2.x, xend=abc2.x, y=c(rep(0.5,length(table(list(pd3e$abc2.x))))), yend=mean)) + 
    geom_segment(stat="identity", size = 8,col="red") + #size=24, 
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,250))+
    theme_cowplot() + 
    theme(axis.text.x = element_text(angle = 45, hjust=1)) +#text = element_text(size=32), axis.text = element_text(size=32), 
    geom_errorbar(aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.75) +
    labs(x="Bacterial mutant",y = "# of sips") 

#   geom_text(aes(label=sigvals, y=mean + sem + 20)) #, size=12

x=.6
#ggsave(h=5.17*x,w=6.64*x,"mutant_validation_totals-2_27.png")

Figure S5B sip duration

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat-all%20together%20with%20dayandtime.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat-all%20together%20with%20dayandtime.xls]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 163 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172168df9a4.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "NumberOfSipsByDay")
aab <- read_xls(tf, sheet = "SipDurations")[1:22,]
ccc <- aaa
j=16

out_df <- data.frame(trt=character(), pref=numeric(),day=numeric(),time=numeric(),dieta=character(),dietb=character())

 for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aab[,i],aab[,(i+j)],aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,c(1)]) & lp2[,c(1)]!=-1 & !is.na(lp2[,c(3)]) & lp2[,c(3)]!=-1 & !is.na(lp2[,c(4)]) & lp2[,c(4)]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time1=lp3[,3], time2 =lp3[,4],dieta=lp3[,5], dietb = lp3[,6], ) %>% dplyr::select(trt,pref,day,time = time1, sip1=dieta, sip2 = dietb, dieta=time1, dietb=time2) 
    out_df <- rbind(out_df,lp4)
  }
 
pd2 <- out_df %>% 
    droplevels()

pref_data <- out_df %>% 
    filter(sip1<1000 & sip2<1000) %>%
    mutate(trt=gsub(pattern = " control", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " halfyeast", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
  filter(trt %in% c("T54_T54", "T03_T03", "T10_T10", "T14_T14", "T18_T18" ,"T19_T19", "T20_T20", "T23_T23", "T28_T28")) %>%
  mutate(total = dieta+dietb) %>%
 droplevels() %>%
 reshape2::melt(measure.vars = c("dieta", "dietb"),variable.name = "time_consuming_diet")

pref_data$trt2 <- factor(pref_data$trt, levels = rev(c("T54_T54" ,"T03_T03", "T10_T10", "T14_T14", "T18_T18" ,"T19_T19" ,"T20_T20", "T23_T23", "T28_T28")))

pref_data$trt2 <- plyr::revalue(pref_data$trt2, c("T54_T54" = "WT",
                                            "T03_T03" = "NitT/TauT",# NS
                                            "T10_T10" = "thiE",#***
                                            "T14_T14" = "oprB",#***
                                            "T18_T18" = "permease",#NS
                                            "T19_T19" = "iscS",#***
                                            "T20_T20" = "nuoA",#NS
                                            "T23_T23" = "ALDH",#NS
                                            "T28_T28" = "Hypothetical")
                         )
                             

  pref_data$tt <- with(pref_data, interaction(trt2, time_consuming_diet))

  cat("KW test")
## KW test
  kwtest <- kruskal.test(pref_data$value ~ pref_data$tt)
  print(kwtest)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 60.693, df = 17, p-value = 8.072e-07
  if (kwtest$p.value < 0.05) {
    efg <- dunn.test(pref_data$value, pref_data$tt, method = "bh", table = F, kw=F)
    ghi <- cldList(comparison = efg$comparisons, p.value = efg$P.adjusted, threshold = 0.05)
    diff_codes <- as.character(unlist(ghi$Letter))
  } else {
    diff_codes <- c(rep("a",j*2))
  }
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
  cat("Compact letter displays")
## Compact letter displays
  ghi
##                 Group Letter MonoLetter
## 1          ALDH.dieta    abc     abc   
## 2          ALDH.dietb  abdef     ab def
## 3  Hypothetical.dieta     ac     a c   
## 4  Hypothetical.dietb  abdef     ab def
## 5          iscS.dieta    abc     abc   
## 6          iscS.dietb    def        def
## 7     NitT/TauT.dieta    abc     abc   
## 8     NitT/TauT.dietb      d        d  
## 9          nuoA.dieta    abc     abc   
## 10         nuoA.dietb     de        de 
## 11         oprB.dieta      c       c   
## 12         oprB.dietb   bdef      b def
## 13     permease.dieta   abef     ab  ef
## 14     permease.dietb     de        de 
## 15         thiE.dieta  abcef     abc ef
## 16         thiE.dietb  abdef     ab def
## 17           WT.dieta    abf     ab   f
## 18           WT.dietb      d        d
  cat("N per group")
## N per group
  table(pref_data$tt)
## 
## Hypothetical.dieta         ALDH.dieta         nuoA.dieta         iscS.dieta 
##                 19                 16                 14                 16 
##     permease.dieta         oprB.dieta         thiE.dieta    NitT/TauT.dieta 
##                 20                 15                 12                 14 
##           WT.dieta Hypothetical.dietb         ALDH.dietb         nuoA.dietb 
##                 19                 19                 16                 14 
##         iscS.dietb     permease.dietb         oprB.dietb         thiE.dietb 
##                 16                 20                 15                 12 
##    NitT/TauT.dietb           WT.dietb 
##                 14                 19
  pd3e <- pref_data %>%
    group_by(tt) %>%
    summarize(mean = mean(value), sem= sd(value)/sqrt(dplyr::n())) %>%
    ungroup()
  
  colors2 <- rev(c(rep(c("red","red", "pink","pink"),5)))
  
  pd3e$tt <- gsub(pattern = "dieta", replacement = "C", x = pd3e$tt)
  pd3e$tt <- gsub(pattern = "dietb", replacement = "T", x = pd3e$tt)
  
  pd3e$tt <- as.character(pd3e$tt)
  pd3e$tt <- factor(pd3e$tt)  
 
   pd3f <- pd3e %>% 
    inner_join(ghi %>% mutate(tt = gsub(pattern = "dieta", replacement = "C", x = Group)) %>% mutate(tt = gsub(pattern = "dietb", replacement = "T", x = tt)) %>% mutate(tt = gsub(pattern = "wp7", replacement = "wp07", x = tt))) 

   pd3f$tt <- factor(pd3f$tt, levels = (c("WT.C","WT.T","NitT/TauT.C","NitT/TauT.T","thiE.C","thiE.T","oprB.C","oprB.T","permease.C","permease.T","iscS.C","iscS.T","nuoA.C","nuoA.T","ALDH.C","ALDH.T","Hypothetical.C","Hypothetical.T")))
  
  plot_text_size = 3
  fig3_sipduration_plot <- ggplot(pd3f, aes(x=tt, y = mean,fill = tt)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
    scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,.3))+
    theme_cowplot() +
#   theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_text(angle = 45), axis.ticks = element_blank()) +
#       theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.ticks = element_blank()) +
        theme(legend.position = "none", axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.375) +
        theme(axis.text.x = element_text(angle = 45, hjust=1)) +#text = element_text(size=32), axis.text = element_text(size=32), 
  # geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=Letter, y=mean + sem + .015), size=plot_text_size, angle=90, hjust=0) + 
    labs(y = "Sip duration (s)", x = "Bacterial mutant")
  fig3_sipduration_plot

x=.6
ggsave(h=5.17*x,w=6.64*x,"fig3-sipduration_addfilter.png")

Figure S5C sip duration

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/DataInExcelFormat-all%20together%20with%20dayandtime.xls?raw=true", write_disk(tf <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/DataInExcelFormat-all%20together%20with%20dayandtime.xls]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 163 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1725846aa58.xls
aaa <- read_xls(tf, sheet = "NumberOfSips")
abb <- read_xls(tf, sheet = "NumberOfSipsByDay")
ccc <- aaa
j=16

out_df <- data.frame(trt=character(), day=numeric(),sipsA = numeric(), sipsB = numeric(), total=numeric())

  for(i in 1:j) {
    lacto_pref <- (ccc[,i]/(ccc[,(j+i)]+ccc[,i]))
    lp2 <- cbind(lacto_pref,abb[,(i)], aaa[,i],aaa[,(j+i)])
    lp3 <- lp2[(!is.na(lp2[,c(1)]) & lp2[,c(1)]!=-1 & !is.na(lp2[,c(3)]) & lp2[,c(3)]!=-1 & !is.na(lp2[,c(4)]) & lp2[,c(4)]!=-1),]
    lp4 <- data.frame(lp3) %>% mutate(trt=paste0(colnames(ccc[i]),"_",colnames(ccc[i+j])), pref=lp3[,1], day=lp3[,2], time1=lp3[,3], time2 =lp3[,4]) %>%
      dplyr::select(trt,day,sipsA = time1, sipsB=time2) %>%
      mutate(total = sipsA+sipsB)
    out_df <- rbind(out_df,lp4)
  }
  
  pref_data <- out_df %>% 
    filter(sipsA<1000 & sipsB<1000) %>%
    filter(!trt%in%c("disregard-111 100g100y_disregard-111 100g50y","disregard-68 100g100y_disregard-68 100g50y","67 100g100y_67 100g50y","25 100g100y_25 100g50y","48 100g100y_48 100g50y","32 100g100y_32 100g50y","axenic 100g100y_axenic 100g50y","103 100g100y_103 100g50y","71 100g100y_71 100g50y")) %>%
    mutate(trt=gsub(pattern = " 10Y10G", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " 10Y7.5G", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
    droplevels() %>%
    reshape2::melt(measure.vars = c("sipsA", "sipsB"),variable.name = "time_consuming_diet")

pd2 <- out_df %>% 
    droplevels()

pref_data <- out_df %>% 
    filter(sipsA<1000 & sipsB<1000) %>%
    mutate(trt=gsub(pattern = " control", replacement = "",x = trt,perl = T)) %>% 
    mutate(trt=gsub(pattern = " halfyeast", replacement = "",x = trt,perl = T)) %>%
    mutate(trt=factor(trt)) %>%
  filter(trt %in% c("T54_T54", "T03_T03", "T10_T10", "T14_T14", "T18_T18" ,"T19_T19", "T20_T20", "T23_T23", "T28_T28")) %>%
 droplevels() %>%
 reshape2::melt(measure.vars = c("sipsA", "sipsB"),variable.name = "time_consuming_diet")

pref_data$trt2 <- factor(pref_data$trt, levels = rev(c("T54_T54" ,"T03_T03", "T10_T10", "T14_T14", "T18_T18" ,"T19_T19" ,"T20_T20", "T23_T23", "T28_T28")))

pref_data$trt2 <- plyr::revalue(pref_data$trt2, c("T54_T54" = "WT",
                                            "T03_T03" = "NitT/TauT",# NS
                                            "T10_T10" = "thiE",#***
                                            "T14_T14" = "oprB",#***
                                            "T18_T18" = "permease",#NS
                                            "T19_T19" = "iscS",#***
                                            "T20_T20" = "nuoA",#NS
                                            "T23_T23" = "ALDH",#NS
                                            "T28_T28" = "Hypothetical")
                         )
                             

  pref_data$tt <- with(pref_data, interaction(trt2, time_consuming_diet))

  cat("KW test")
## KW test
  kwtest <- kruskal.test(pref_data$value ~ pref_data$tt)
  print(kwtest)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  pref_data$value by pref_data$tt
## Kruskal-Wallis chi-squared = 50.257, df = 17, p-value = 3.853e-05
  if (kwtest$p.value < 0.05) {
    efg <- dunn.test(pref_data$value, pref_data$tt, method = "bh", table = F, kw=F)
    ghi <- cldList(comparison = efg$comparisons, p.value = efg$P.adjusted, threshold = 0.05)
    diff_codes <- as.character(unlist(ghi$Letter))
    write.csv(data.frame(efg) %>% filter(grepl(pattern = "WT", x = comparisons)), "figs5c_significance.csv")
  } else {
    diff_codes <- c(rep("a",j*2))
  }
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
  cat("Compact letter displays")
## Compact letter displays
  ghi
##                 Group Letter MonoLetter
## 1          ALDH.sipsA    abc      abc  
## 2          ALDH.sipsB      d         d 
## 3  Hypothetical.sipsA     ab      ab   
## 4  Hypothetical.sipsB   acde      a cde
## 5          iscS.sipsA   acde      a cde
## 6          iscS.sipsB     de         de
## 7     NitT/TauT.sipsA   acde      a cde
## 8     NitT/TauT.sipsB      d         d 
## 9          nuoA.sipsA   abce      abc e
## 10         nuoA.sipsB    cde        cde
## 11         oprB.sipsA      b       b   
## 12         oprB.sipsB   acde      a cde
## 13     permease.sipsA   acde      a cde
## 14     permease.sipsB     de         de
## 15         thiE.sipsA    cde        cde
## 16         thiE.sipsB     de         de
## 17           WT.sipsA    abc      abc  
## 18           WT.sipsB    cde        cde
  cat("N per group")
## N per group
  table(pref_data$tt)
## 
## Hypothetical.sipsA         ALDH.sipsA         nuoA.sipsA         iscS.sipsA 
##                 21                 16                 14                 16 
##     permease.sipsA         oprB.sipsA         thiE.sipsA    NitT/TauT.sipsA 
##                 20                 15                 13                 14 
##           WT.sipsA Hypothetical.sipsB         ALDH.sipsB         nuoA.sipsB 
##                 20                 21                 16                 14 
##         iscS.sipsB     permease.sipsB         oprB.sipsB         thiE.sipsB 
##                 16                 20                 15                 13 
##    NitT/TauT.sipsB           WT.sipsB 
##                 14                 20
  pd3e <- pref_data %>%
    group_by(tt) %>%
    summarize(mean = mean(value), sem= sd(value)/sqrt(dplyr::n())) %>%
    ungroup()
  
  colors2 <- rev(c(rep(c("red","red", "pink","pink"),5)))
  
  pd3e$tt <- gsub(pattern = "sipsA", replacement = "C", x = pd3e$tt)
  pd3e$tt <- gsub(pattern = "sipsB", replacement = "T", x = pd3e$tt)
  
  pd3e$tt <- as.character(pd3e$tt)
  pd3e$tt <- factor(pd3e$tt)  
 
   pd3f <- pd3e %>% 
    inner_join(ghi %>% mutate(tt = gsub(pattern = "sipsA", replacement = "C", x = Group)) %>% mutate(tt = gsub(pattern = "sipsB", replacement = "T", x = tt)) %>% mutate(tt = gsub(pattern = "wp7", replacement = "wp07", x = tt))) 

   pd3f$tt <- factor(pd3f$tt, levels = (c("WT.C","WT.T","NitT/TauT.C","NitT/TauT.T","thiE.C","thiE.T","oprB.C","oprB.T","permease.C","permease.T","iscS.C","iscS.T","nuoA.C","nuoA.T","ALDH.C","ALDH.T","Hypothetical.C","Hypothetical.T")))
  
  plot_text_size = 3
  fig3_sipduration_plot <- ggplot(pd3f, aes(x=tt, y = mean,fill = tt)) +
    geom_bar(stat="identity",size=plot_text_size*1) +
    scale_color_manual(values = colors2) +
    scale_fill_manual(name="Legend",values=colors2) +
    scale_y_continuous(limits = c(0,150))+
    theme_cowplot() +
#   theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.text = element_text(angle = 45), axis.ticks = element_blank()) +
#       theme(text = element_text(size=plot_text_size), axis.title = element_blank(), legend.position = "none", axis.line = element_line(size = 0.2), axis.ticks = element_blank()) +
        theme(legend.position = "none", axis.ticks = element_blank()) +
    geom_errorbar(col = "black", aes(ymax =mean+sem, ymin = mean-sem), width=0.25, size=.375) +
        theme(axis.text.x = element_text(angle = 45, hjust=1)) +#text = element_text(size=32), axis.text = element_text(size=32), 
  # geom_hline(yintercept = 0.5, linetype="dashed") +
    geom_text(aes(label=Letter, y=mean + sem + 3), size=plot_text_size,angle=90, hjust=0) + 
    labs(y = "# of sips", x = "Bacterial mutant")
  fig3_sipduration_plot

x=.6
ggsave(h=5.17*x,w=6.64*x,"fig3-sipdistribution_addfilter.png", bg="white")

Figure 4

matching_file <- data.frame(plate = c("1","2","3","4","5","6","7","8"), type = c("dietandflies","flies","dietnoflies","flies","dietandflies","dietnoflies","flies","dietandflies"), day =c("4-7","4-7","4-8","4-8","4-8","4-10","4-10","4-10"))

GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/Glucose_Protein_Values_JMC.xlsx?raw=true", write_disk(yg1 <- tempfile(fileext = ".xlsx")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/Glucose_Protein_Values_JMC.xlsx]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 24 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea172e4ae0ee.xlsx
GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/PlateA.xlsx?raw=true", write_disk(yg2 <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/PlateA.xlsx]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 13.4 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea17255f53096.xls
GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/PlateB.xlsx?raw=true", write_disk(yg3 <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/PlateB.xlsx]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 11.6 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1725448f3f0.xls
GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/PlateC.xlsx?raw=true", write_disk(yg4 <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/PlateC.xlsx]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 12.3 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1721873ccb.xls
GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/plate1.xlsx?raw=true", write_disk(yg5 <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/plate1.xlsx]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 11.9 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea17255a83435.xls
GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/plate2.xlsx?raw=true", write_disk(yg6 <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/plate2.xlsx]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 11.9 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea17215fbaf82.xls
GET("https://github.com/johnchaston/TBCdietPref/blob/master/files/plate3.xlsx?raw=true", write_disk(yg7 <- tempfile(fileext = ".xls")))
## Response [https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/plate3.xlsx]
##   Date: 2022-05-30 23:21
##   Status: 200
##   Content-Type: application/octet-stream
##   Size: 11.6 kB
## <ON DISK>  /var/folders/kh/d3bt5f393vg2hh5xbqw927bw0000gq/T//RtmpXEhBuV/filea1723ebf8b14.xls
pg_data <- combine_files(yg1,"Plate 1 Glucose","Plate A Protein",yg5,"Plate 1 - Sheet1", yg2,"Plate 1 - Sheet1") %>% 
  rbind(combine_files(yg1,"Plate 2 Glucose","Plate B Protein",yg6,"Plate 2 - Sheet1", yg3,"Plate 1 - Sheet1")) %>%
  rbind(combine_files(yg1,"Plate 3 Glucose","Plate C Protein",yg7,"Plate 3 - Sheet1", yg4,"Plate 1 - Sheet1")) %>% 
    inner_join(matching_file, by = "plate")

pg_data <- pg_data %>% 
    mutate(normalized_glucose = glucose/(protein/2)) %>%
    mutate(treatment = gsub(pattern = "WT",replacement = "54",x = treatment)) %>%
  filter(treatment!=12) %>% 
  droplevels()


#write.csv(pg_data, 'pg_data.csv')
## 
# diet and flies
## 
dietandflies <- pg_data %>% filter(type == "dietandflies") %>% droplevels()

## test if normal - normalized is not, so log-transform
shapiro.test(dietandflies$normalized_glucose)
## 
##  Shapiro-Wilk normality test
## 
## data:  dietandflies$normalized_glucose
## W = 0.9145, p-value = 0.0007362
shapiro.test(dietandflies$glucose)
## 
##  Shapiro-Wilk normality test
## 
## data:  dietandflies$glucose
## W = 0.98357, p-value = 0.6407
shapiro.test(dietandflies$protein)
## 
##  Shapiro-Wilk normality test
## 
## data:  dietandflies$protein
## W = 0.98349, p-value = 0.6372
shapiro.test(log(dietandflies$normalized_glucose+1))
## 
##  Shapiro-Wilk normality test
## 
## data:  log(dietandflies$normalized_glucose + 1)
## W = 0.96963, p-value = 0.1691
dietandflies$treatment <- as.factor(dietandflies$treatment)
def <- lmer(protein ~ treatment + (1|day), dietandflies)
summary(def)
## Linear mixed model fit by REML ['lmerMod']
## Formula: protein ~ treatment + (1 | day)
##    Data: dietandflies
## 
## REML criterion at convergence: 110.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.30274 -0.56566 -0.01275  0.67070  1.76667 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  day      (Intercept) 0.1111   0.3334  
##  Residual             0.3743   0.6118  
## Number of obs: 56, groups:  day, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.12831    0.25267  12.381
## treatment19  0.28208    0.23581   1.196
## treatment54 -0.05554    0.23162  -0.240
## treatmentX   0.73814    0.22748   3.245
## 
## Correlation of Fixed Effects:
##             (Intr) trtm19 trtm54
## treatment19 -0.448              
## treatment54 -0.458  0.488       
## treatmentX  -0.466  0.497  0.509
efg <- glht(def, mcp(treatment = "Tukey"))
fgh <- cld(efg)

def <- lmer(glucose ~ treatment + (1|day), dietandflies)
summary(def)
## Linear mixed model fit by REML ['lmerMod']
## Formula: glucose ~ treatment + (1 | day)
##    Data: dietandflies
## 
## REML criterion at convergence: 127.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.47546 -0.71491  0.06046  0.63469  2.15492 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  day      (Intercept) 0.2153   0.4640  
##  Residual             0.5060   0.7114  
## Number of obs: 56, groups:  day, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  7.41018    0.32863  22.549
## treatment19  0.35716    0.27420   1.303
## treatment54 -0.07316    0.26934  -0.272
## treatmentX  -0.22662    0.26451  -0.857
## 
## Correlation of Fixed Effects:
##             (Intr) trtm19 trtm54
## treatment19 -0.400              
## treatment54 -0.410  0.487       
## treatmentX  -0.417  0.497  0.509
efg <- glht(def, mcp(treatment = "Tukey"))
fgh <- cld(efg)

def <- lmer(log(normalized_glucose+1) ~ treatment + (1|day), dietandflies)
summary(def)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(normalized_glucose + 1) ~ treatment + (1 | day)
##    Data: dietandflies
## 
## REML criterion at convergence: -24.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.02797 -0.56382 -0.04824  0.54408  2.68789 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  day      (Intercept) 0.008255 0.09086 
##  Residual             0.027874 0.16696 
## Number of obs: 56, groups:  day, 3
## 
## Fixed effects:
##               Estimate Std. Error t value
## (Intercept)  1.7671699  0.0688999  25.648
## treatment19 -0.0341132  0.0643520  -0.530
## treatment54 -0.0006496  0.0632097  -0.010
## treatmentX  -0.2048926  0.0620789  -3.301
## 
## Correlation of Fixed Effects:
##             (Intr) trtm19 trtm54
## treatment19 -0.448              
## treatment54 -0.459  0.488       
## treatmentX  -0.467  0.497  0.509
efg <- glht(def, mcp(treatment = "Tukey"))
fgh <- cld(efg)

df2 <- dietandflies %>% group_by(treatment) %>% summarize(mprot = mean(protein), sprot = sd(protein) / sqrt(dplyr::n()), mglu = mean(glucose), sglu = sd(glucose) / sqrt(dplyr::n()), mnglu = mean(normalized_glucose), snglu = sd(normalized_glucose) / sqrt(dplyr::n()))

df2$treatment = factor(df2$treatment, levels = c(54, 14, 19, "X"))

cat("Figure S2A")
## Figure S2A
ggplot(df2, aes(x=treatment, y = mprot)) + 
  geom_col() +
  geom_errorbar(aes(ymax = mprot+sprot, ymin = mprot-sprot), position = position_dodge(width = 0.9), width = 0.5) + 
  theme_cowplot() + 
  scale_x_discrete(labels = c(expression(italic("A. fabarum")), expression(italic("oprB")), expression(italic("iscS")),expression("Axenic"))) +
  geom_text(aes(y = (mprot+sprot)*1.05), label = c("a","ab","a","b"), size = 6) + 
  labs(y = bquote(mu*"g protein sample"^-1))

#ggsave("protein_plot.jpg", width = 4, height = 4)

cat("Figure S2B")
## Figure S2B
ggplot(df2, aes(x=treatment, y = mglu)) + 
  geom_col() +
  geom_errorbar(aes(ymax = mglu+sglu, ymin = mglu-sglu), position = position_dodge(width = 0.9), width = 0.5) + 
  theme_cowplot() + 
  scale_x_discrete(labels = c(expression(italic("A. fabarum")), expression(italic("oprB")), expression(italic("iscS")),expression("Axenic"))) +
  geom_text(aes(y = (mglu+sglu)*1.05), label = c("a","a","a","a"), size = 6) + 
  labs(y = bquote(mu*"g glucose sample"^-1))

#ggsave("glucose_plot.jpg", width = 4, height = 4)  
  
cat("Figure 4A")
## Figure 4A
ggplot(df2, aes(x=treatment, y = mnglu)) + 
  geom_col() +
  geom_errorbar(aes(ymax = mnglu+snglu, ymin = mnglu-snglu), position = position_dodge(width = 0.9), width = 0.5) + 
  theme_cowplot() + 
  scale_x_discrete(labels = c(expression(italic("A. fabarum")), expression(italic("oprB")), expression(italic("iscS")),expression("Axenic"))) +
  geom_text(aes(y = (mnglu+snglu)*1.05), label = c("b","b","b","a"), size = 6) + 
  labs(y = bquote(mu*"g glucose"~mu*"g protein"^-1))

#ggsave("normalized_glucose.jpg", width = 4, height = 4)  

##
# Diet, no flies
##

dietnoflies <- pg_data %>% filter(type == "dietnoflies" & treatment != 12) %>% droplevels()

## test if normal - protein is close enough, including considering multiple tests, that we assumed normality for all
shapiro.test(dietnoflies$normalized_glucose)
## 
##  Shapiro-Wilk normality test
## 
## data:  dietnoflies$normalized_glucose
## W = 0.95884, p-value = 0.1631
shapiro.test((dietnoflies$glucose))
## 
##  Shapiro-Wilk normality test
## 
## data:  (dietnoflies$glucose)
## W = 0.98309, p-value = 0.8126
shapiro.test((dietnoflies$protein))
## 
##  Shapiro-Wilk normality test
## 
## data:  (dietnoflies$protein)
## W = 0.93793, p-value = 0.03252
dietnoflies$treatment <- as.factor(dietnoflies$treatment)
def <- lm(protein ~ treatment, dietnoflies)
summary(def)
## 
## Call:
## lm(formula = protein ~ treatment, data = dietnoflies)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4242 -0.4152 -0.1819  0.6515  1.1948 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.6964     0.2167  12.445 2.07e-14 ***
## treatment19  -0.2226     0.3064  -0.726    0.472    
## treatment54  -0.2471     0.3064  -0.806    0.426    
## treatmentX    0.3992     0.3148   1.268    0.213    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6852 on 35 degrees of freedom
## Multiple R-squared:  0.1325, Adjusted R-squared:  0.05812 
## F-statistic: 1.782 on 3 and 35 DF,  p-value: 0.1687
efg <- glht(def, mcp(treatment = "Tukey"))
fgh <- cld(efg)

def <- lm(glucose ~ treatment, dietnoflies)
summary(def)
## 
## Call:
## lm(formula = glucose ~ treatment, data = dietnoflies)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.54206 -0.49726  0.04863  0.41149  1.36572 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   7.4829     0.2280  32.824   <2e-16 ***
## treatment19   0.1148     0.3224   0.356   0.7239    
## treatment54   0.5009     0.3224   1.554   0.1293    
## treatmentX   -0.5765     0.3312  -1.740   0.0906 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7209 on 35 degrees of freedom
## Multiple R-squared:  0.2356, Adjusted R-squared:  0.1701 
## F-statistic: 3.596 on 3 and 35 DF,  p-value: 0.02297
efg <- glht(def, mcp(treatment = "Tukey"))
fgh <- cld(efg)

def <- lm(normalized_glucose ~ treatment, dietnoflies)
summary(def)
## 
## Call:
## lm(formula = normalized_glucose ~ treatment, data = dietnoflies)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7898 -1.5941 -0.0418  1.3142  4.3728 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.0036     0.5664  10.599 1.81e-12 ***
## treatment19   0.4647     0.8011   0.580    0.566    
## treatment54   0.7426     0.8011   0.927    0.360    
## treatmentX   -1.0294     0.8230  -1.251    0.219    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.791 on 35 degrees of freedom
## Multiple R-squared:  0.1315, Adjusted R-squared:  0.05709 
## F-statistic: 1.767 on 3 and 35 DF,  p-value: 0.1714
efg <- glht(def, mcp(treatment = "Tukey"))
fgh <- cld(efg)

df2 <- dietnoflies %>% group_by(treatment) %>% summarize(mprot = mean(protein), sprot = sd(protein) / sqrt(dplyr::n()), mglu = mean(glucose), sglu = sd(glucose) / sqrt(dplyr::n()), mnglu = mean(normalized_glucose), snglu = sd(normalized_glucose) / sqrt(dplyr::n()))

df2$treatment = factor(df2$treatment, levels = c(54, 14, 19, "X"))

cat("Figure S2C")
## Figure S2C
ggplot(df2, aes(x=treatment, y = mprot)) + 
  geom_col() +
  geom_errorbar(aes(ymax = mprot+sprot, ymin = mprot-sprot), position = position_dodge(width = 0.9), width = 0.5) + 
  theme_cowplot() + 
  scale_x_discrete(labels = c(expression(italic("A. fabarum")), expression(italic("oprB")), expression(italic("iscS")),expression("Axenic"))) +
  geom_text(aes(y = (mprot+sprot)*1.05), label = c("a","a","a","a"), size = 6) + 
  labs(y = bquote(mu*"g protein sample"^-1))

#ggsave("protein_plot_noflies.jpg", width = 4, height = 4)

cat("Figure S2D")
## Figure S2D
ggplot(df2, aes(x=treatment, y = mglu)) + 
  geom_col() +
  geom_errorbar(aes(ymax = mglu+sglu, ymin = mglu-sglu), position = position_dodge(width = 0.9), width = 0.5) + 
  theme_cowplot() + 
  scale_x_discrete(labels = c(expression(italic("A. fabarum")), expression(italic("oprB")), expression(italic("iscS")),expression("Axenic"))) +
  geom_text(aes(y = (mglu+sglu)*1.05), label = c("ab","ab","b","a"), size = 6) + 
  labs(y = bquote(mu*"g glucose sample"^-1))

#ggsave("glucose_plot_noflies.jpg", width = 4, height = 4)  
 
cat("Figure 4B") 
## Figure 4B
ggplot(df2, aes(x=treatment, y = mnglu)) + 
  geom_col() +
  geom_errorbar(aes(ymax = mnglu+snglu, ymin = mnglu-snglu), position = position_dodge(width = 0.9), width = 0.5) + 
  theme_cowplot() + 
  scale_x_discrete(labels = c(expression(italic("A. fabarum")), expression(italic("oprB")), expression(italic("iscS")),expression("Axenic"))) +
  geom_text(aes(y = (mnglu+snglu)*1.05), label = c("a","a","a","a"), size = 6) + 
  labs(y = bquote(mu*"g glucose"~mu*"g protein"^-1))

#ggsave("normalized_glucose_plot_noflies.jpg", width = 4, height = 4)  

##
# flies only
##

flies <- pg_data %>% filter(type == "flies" & treatment != 12) %>% droplevels()

## test if normal - normalized is not normal and cannot be transformed normal, so use a Kruskal-Wallis test
flies$normalized_glucose = flies$glucose_perfly / flies$protein_perfly
shapiro.test(flies$normalized_glucose)
## 
##  Shapiro-Wilk normality test
## 
## data:  flies$normalized_glucose
## W = 0.9228, p-value = 0.002385
shapiro.test(log10(flies$normalized_glucose+1))
## 
##  Shapiro-Wilk normality test
## 
## data:  log10(flies$normalized_glucose + 1)
## W = 0.93605, p-value = 0.007766
shapiro.test((flies$glucose_perfly))
## 
##  Shapiro-Wilk normality test
## 
## data:  (flies$glucose_perfly)
## W = 0.97854, p-value = 0.4656
shapiro.test((flies$protein_perfly))
## 
##  Shapiro-Wilk normality test
## 
## data:  (flies$protein_perfly)
## W = 0.97526, p-value = 0.3485
flies$treatment <- as.factor(flies$treatment)
def <- lm(protein_perfly ~ treatment, flies)
summary(def)
## 
## Call:
## lm(formula = protein_perfly ~ treatment, data = flies)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.6894  -5.1507   0.0963   5.5258  16.5278 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   45.720      2.248  20.338   <2e-16 ***
## treatment19   -3.669      3.112  -1.179   0.2442    
## treatment54   -4.623      3.004  -1.539   0.1303    
## treatmentX    -5.966      2.960  -2.016   0.0494 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.456 on 48 degrees of freedom
## Multiple R-squared:  0.08218,    Adjusted R-squared:  0.02481 
## F-statistic: 1.433 on 3 and 48 DF,  p-value: 0.2449
efg <- glht(def, mcp(treatment = "Tukey"))
fgh <- cld(efg)

def <- lm(glucose_perfly ~ treatment, flies)
summary(def)
## 
## Call:
## lm(formula = glucose_perfly ~ treatment, data = flies)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4244 -0.7395 -0.0135  0.6489  2.3140 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   8.6847     0.3257  26.665   <2e-16 ***
## treatment19  -0.9117     0.4509  -2.022   0.0488 *  
## treatment54  -0.8505     0.4352  -1.954   0.0565 .  
## treatmentX    1.0806     0.4288   2.520   0.0151 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.08 on 48 degrees of freedom
## Multiple R-squared:  0.3963, Adjusted R-squared:  0.3586 
## F-statistic:  10.5 on 3 and 48 DF,  p-value: 1.997e-05
efg <- glht(def, mcp(treatment = "Tukey"))
fgh <- cld(efg)

def <- kruskal.test(flies$normalized_glucose ~ as.factor(flies$treatment))
efg <- dunn.test(flies$normalized_glucose,as.factor(flies$treatment), method = "bh", table = F)
##   Kruskal-Wallis rank sum test
## 
## data: x and group
## Kruskal-Wallis chi-squared = 14.9897, df = 3, p-value = 0
## 
## 
## alpha = 0.05
## Reject Ho if p <= alpha/2
ghi <- cldList(comparison = efg$comparisons, p.value = efg$P.adjusted, threshold = 0.05)

df2 <- flies %>% group_by(treatment) %>% summarize(mprot = mean(protein_perfly), sprot = sd(protein_perfly) / sqrt(dplyr::n()), mglu = mean(glucose_perfly), sglu = sd(glucose_perfly) / sqrt(dplyr::n()), mnglu = mean(normalized_glucose), snglu = sd(normalized_glucose) / sqrt(dplyr::n()))

df2$treatment = factor(df2$treatment, levels = c(54, 14, 19, "X"))

cat("Figure S2E")
## Figure S2E
ggplot(df2, aes(x=treatment, y = mprot)) + 
  geom_col() +
  geom_errorbar(aes(ymax = mprot+sprot, ymin = mprot-sprot), position = position_dodge(width = 0.9), width = 0.5) + 
  theme_cowplot() + 
  scale_x_discrete(labels = c(expression(italic("A. fabarum")), expression(italic("oprB")), expression(italic("iscS")),expression("Axenic"))) +
  geom_text(aes(y = (mprot+sprot)*1.05), label = c("a","a","a","a"), size = 6) + 
  labs(y = bquote(mu*"g protein sample"^-1))

#ggsave("protein_plot_flies.jpg", width = 4, height = 4)

cat("Figure S2F")
## Figure S2F
ggplot(df2, aes(x=treatment, y = mglu)) + 
  geom_col() +
  geom_errorbar(aes(ymax = mglu+sglu, ymin = mglu-sglu), position = position_dodge(width = 0.9), width = 0.5) + 
  theme_cowplot() + 
  scale_x_discrete(labels = c(expression(italic("A. fabarum")), expression(italic("oprB")), expression(italic("iscS")),expression("Axenic"))) +
  geom_text(aes(y = (mglu+sglu)*1.05), label = c("ab","a","a","b"), size = 6) + 
  labs(y = bquote(mu*"g glucose sample"^-1))

#ggsave("glucose_plot_flies.jpg", width = 4, height = 4)  
 
cat("Figure 4C")
## Figure 4C
ggplot(df2, aes(x=treatment, y = mnglu)) + 
  geom_col() +
  geom_errorbar(aes(ymax = mnglu+snglu, ymin = mnglu-snglu), position = position_dodge(width = 0.9), width = 0.5) + 
  theme_cowplot() + 
  scale_x_discrete(labels = c(expression(italic("A. fabarum")), expression(italic("oprB")), expression(italic("iscS")),expression("Axenic"))) +
  geom_text(aes(y = (mnglu+snglu)*1.05), label = c("a","a","a","b"), size = 6) + 
  labs(y = bquote(mu*"g glucose"~mu*"g protein"^-1))

#ggsave("normalized_glucose_plot_flies.jpg", width = 4, height = 4)  

Figure S8

## read in your file here, need to change the filepath
traits <- read_xlsx('~/Dropbox/mec15344-sup-0002-DatasetsS1-S14.xlsx', sheet = "Dataset S7", skip = 2, na = "NA")
  ## get pref data
pd3e <- read.csv(url('https://raw.githubusercontent.com/johnchaston/TBCdietPref/master/files/fig2-pd3e.csv'))

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = c("1_1","11cb6_11cb6","11cb7_11cb7","11ct2_11ct2","13_13","137_137","15_15","16_16","181_181","196_196","1ab7_1ab7","28_28","29_29","3_3","30_30","31_31","34_34","35_35","39_39","4_4","43_43","45_45","5_5","52_52","56_56","6_6","66_66","69_69","70_70","73_73","75_75","98_98"))

pd3e$abc2.x <- plyr::revalue(pd3e$abc2.x, c("1_1" = "lbrc",
                                            "11cb6_11cb6" = "wp07",
                                            "11cb7_11cb7" = "wp13",
                                            "11ct2_11ct2"="asl5",
                                            "13_13"="bsub", 
                                            "137_137" = "lc37", 
                                            "15_15" = "ecok", 
                                            "16_16" = "pput", 
                                            "181_181" = "lpar", 
                                            "196_196" = "llcs",
                                            "1ab7_1ab7" = "wp18", 
                                            "28_28" = "atrn",
                                            "29_29" = "gfra", 
                                            "3_3" = "lfrc",
                                            "30_30" = "apan", 
                                            "31_31" = "gxyl",
                                            "34_34" = "apnb",
                                            "35_35" = "aace",
                                            "39_39" = "apa3",
                                            "4_4" = "apoc",
                                            "43_43" = "aci5",
                                            "45_45" = "aori",
                                            "5_5" = "atrc",
                                            "52_52" = "cint",
                                            "56_56" = "galb",
                                            "6_6" = "amac",
                                            "66_66" = "lmli",
                                            "69_69" = "lfal",
                                            "70_70" = "lfrk",
                                            "73_73" = "lbuc",
                                            "75_75" = "lplw",
                                            "98_98" = "lsui")
                             )
                             

pd3e$abc2.x <- factor(pd3e$abc2.x, levels = rev(c("atrn","atrc","amac","aori","aci5","aace","apan","apa3","asl5","apnb","apoc","gfra","galb","gxyl","cint","ecok","pput","lmli","lplw","lpar","lbrc","lbuc","lfrk","lfrc","llcs","wp07","wp13","wp18","lfal","lsui","lc37","bsub")))

pd3e <- pd3e %>% arrange(abc2.x)

pd4 <- pd3e %>% inner_join(traits, by = c("abc2.x" = "treatment"))

eee <- cor.test(pd4$mean,pd4$tgl_fly, method = "spearman")

cat("Correlation test results")
## Correlation test results
tgl <- ggplot(pd4, aes(x = mean, y = tgl_fly, color = color)) + 
  geom_point(size = 2) +
  scale_color_identity() +
  stat_smooth(aes(group = 1), method = "lm", alpha = 0, formula = y ~ x, color = "black") + 
  theme_cowplot() + 
  labs(x = "DP index", y = bquote(mu*g~triacylglycerides~fly^-1)) +
  annotate(y = 9, x=.2, geom="text", label=paste0("S = ",round(cor.test(pd4$mean,pd4$tgl_fly, method = "spearman")$statistic,2),"; df = ", cor.test(pd4$mean,pd4$tgl_fly)$parameter,"; p = ", round(cor.test(pd4$mean,pd4$tgl_fly, method = "spearman")$p.value,2)), size =4, hjust = 0)
tgl

starve <- ggplot(pd4, aes(x = mean, y = starve, color = color)) + 
  geom_point(size = 2) +
  scale_color_identity() +
  stat_smooth(aes(group = 1), method = "lm", alpha = 0, formula = y ~ x, color = "black") + 
  theme_cowplot() + 
  labs(x = "DP index", y = "time (d)") + 
    annotate(y = 2.75, x=.2, geom="text", label=paste0("S = ",round(cor.test(pd4$mean,pd4$starve, method = "spearman")$statistic,2),"; df = ", cor.test(pd4$mean,pd4$starve)$parameter,"; p = ", round(cor.test(pd4$mean,pd4$starve, method = "spearman")$p.value,2)), size = 4, hjust = 0)
starve

lifespan <- ggplot(pd4, aes(x = mean, y = lifespan, color = color)) + 
  geom_point(size = 2) +
  scale_color_identity() +
  stat_smooth(aes(group = 1), method = "lm", alpha = 0, formula = y ~ x, color = "black") + 
  theme_cowplot() + 
  labs(x = "DP index", y = "time (d)")+ 
    annotate(y = 38, x=.2, geom="text", label=paste0("S = ",round(cor.test(pd4$mean,pd4$lifespan, method = "spearman")$statistic,2),"; df = ", cor.test(pd4$mean,pd4$lifespan)$parameter,"; p = ", round(cor.test(pd4$mean,pd4$lifespan, method = "spearman")$p.value,2)), size = 4, hjust = 0)
lifespan

dev_time <- ggplot(pd4, aes(x = mean, y = dev_time, color = color)) + 
  geom_point(size = 2) +
  scale_color_identity() +
  stat_smooth(aes(group = 1), method = "lm", alpha = 0, formula = y ~ x, color = "black") + 
  theme_cowplot() + 
  labs(x = "DP index", y = bquote(time~(d^-1)))+ 
    annotate(y = .232, x=.2, geom="text", label=paste0("S = ",round(cor.test(pd4$mean,pd4$dev_time, method = "spearman")$statistic,2),"; df = ", cor.test(pd4$mean,pd4$dev_time)$parameter,"; p = ", round(cor.test(pd4$mean,pd4$dev_time, method = "spearman")$p.value,2)), size = 4, hjust = 0)
dev_time

fecundity <- ggplot(pd4, aes(x = mean, y = fecundity, color = color)) + 
  geom_point(size = 2) +
  scale_color_identity() +
  stat_smooth(aes(group = 1), method = "lm", alpha = 0, formula = y ~ x, color = "black") + 
  theme_cowplot() + 
  labs(x = "DP index", y = bquote(offspring~hr-1))+ 
    annotate(y =1, x=.2, geom="text", label=paste0("S = ",round(cor.test(pd4$mean,pd4$fecundity, method = "spearman")$statistic,2),"; df = ", cor.test(pd4$mean,pd4$fecundity)$parameter,"; p = ", round(cor.test(pd4$mean,pd4$fecundity, method = "spearman")$p.value,2)), size = 4, hjust = 0)
fecundity

feeding_rate <- ggplot(pd4, aes(x = mean, y = feeding_rate, color = color)) + 
  geom_point(size = 2) +
  scale_color_identity() +
  stat_smooth(aes(group = 1), method = "lm", alpha = 0, formula = y ~ x, color = "black") + 
  theme_cowplot() + 
  labs(x = "DP index", y = bquote(mu*g~food~30~min^-1)) +
    annotate(y = .16, x=.2, geom="text", label=paste0("S = ",round(cor.test(pd4$mean,pd4$feeding_rate, method = "spearman")$statistic,2),"; df = ", cor.test(pd4$mean,pd4$feeding_rate)$parameter,"; p = ", round(cor.test(pd4$mean,pd4$feeding_rate, method = "spearman")$p.value,2)), size = 4, hjust = 0)
feeding_rate

glucose <- ggplot(pd4, aes(x = mean, y = glucose, color = color)) + 
  geom_point(size = 2) +
  scale_color_identity() +
  stat_smooth(aes(group = 1), method = "lm", alpha = 0, formula = y ~ x, color = "black") + 
  theme_cowplot() + 
  labs(x = "DP index", y = bquote(mu*g~glucose~fly^-1)) +
    annotate(y = 11, x=.2, geom="text", label=paste0("S = ",round(cor.test(pd4$mean,pd4$glucose, method = "spearman")$statistic,2),"; df = ", cor.test(pd4$mean,pd4$glucose)$parameter,"; p = ", round(cor.test(pd4$mean,pd4$glucose, method = "spearman")$p.value,2)), size = 4, hjust = 0)
glucose

## Use to print to figure.
# jpeg(h=9, w=9, "trait_plot.jpg", units = "in", res = 300)
#   grid.arrange(   
#       tgl+labs(tag="A"), starve+labs(tag="B"), lifespan+labs(tag="C"),
#       dev_time+labs(tag="D"), fecundity+labs(tag="E"), feeding_rate+labs(tag="F"), ## 1,2,3,4
#       glucose+labs(tag="G"),
#       widths = c(3,3,3),  
#       heights = c(3,3,3), 
#       layout_matrix=rbind(    
#         c(1,2,3),
#         c(4,5,6),
#         c(7,NA,NA)
#         )
#       )
# dev.off()